In [1]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('youth_smoking_drug_data_10000_rows_expanded.csv')

print("Dataset Overview:")
print(data.head())


Dataset Overview:
   Year Age_Group  Gender  Smoking_Prevalence  Drug_Experimentation  \
0  2024     15-19    Both               18.85                 32.40   
1  2024     10-14  Female               34.88                 41.57   
2  2023     10-14    Both               42.00                 56.80   
3  2024     40-49    Both               33.75                 42.90   
4  2023     15-19    Male               47.90                 39.62   

  Socioeconomic_Status  Peer_Influence School_Programs  Family_Background  \
0                 High               5             Yes                  1   
1                 High               6             Yes                 10   
2                 High               6             Yes                  2   
3               Middle              10              No                  9   
4                 High               1              No                  2   

   Mental_Health Access_to_Counseling  Parental_Supervision  \
0              5             

In [2]:
print("The Initial Shape of the Dataset:", data.shape)


The Initial Shape of the Dataset: (10000, 15)


In [3]:
class Sigmoid:
    def forward(self,x):
        # Computed the sigmoind function
        # The result is stored in self.output for use during the backward pass
        self.output = 1 / (1 + np.exp(-x))
        return self.output
    
    def backward(self, grad):
        return grad * self.output * (1 - self.output)


In [4]:
class ReLU:
    def forward(self, x):
        # Save the input for use in the backward pass
        self.input = x  
        relu_output = np.maximum(0, x)
        return np.maximum(0, x)  # Apply ReLU activation

    def backward(self, grad):
        # Create a copy of grad to avoid modifying it directly
        grad_input = np.array(grad, copy=True)
        # Set the gradient to 0 for all input values where x <= 0
        grad_input[self.input <= 0] = 0
        return grad_input  # Return the modified gradient


In [5]:
class Softmax:
    def forward(self, x):
        # Subtract the max value for numerical stability
        exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
        # Normalize by dividing by the sum of exponentials
        self.output = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        return self.output

    def backward(self, grad):
        # Gradient w.r.t. the input is simply grad passed from the loss
        # Cross-entropy loss simplifies this calculation
        return grad


In [6]:
class Dropout:
    def __init__(self, dropout_rate):
        
        self.dropout_rate = dropout_rate
        self.mask = None

    def forward(self, inputs, training=True):
        
        if training:
            # Generate the dropout mask (1 for active neurons, 0 for dropped neurons)
            self.mask = np.random.binomial(1, 1 - self.dropout_rate, size=inputs.shape)
            # Apply the mask and scale the active neurons
            return inputs * self.mask / (1 - self.dropout_rate)
        else:
            # At test time, just return the inputs unchanged
            return inputs

    def backward(self, grad_output):
       
        # Propagate gradients only through active neurons
        return grad_output * self.mask / (1 - self.dropout_rate)


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

class FullyConnectedNN:
    #initializing neural netwroks weights, biases and regularization
    def __init__(self, layer_sizes, activations, dropout_rates=None, l1=0.01, l2=0.01):
        self.layer_sizes = layer_sizes #number of neurons per layer
        self.activations = activations #which acitvation function is used for each layer
        self.dropout_rates = dropout_rates or [0.0] * (len(layer_sizes) - 1) #dropout rate for each layer
        self.l1 = l1
        self.l2 = l2

        self.weights = []
        self.biases = []
        for i in range(len(layer_sizes) - 1):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]) * 0.01)
            self.biases.append(np.zeros((1, layer_sizes[i + 1])))

    def _activation_forward(self, x, activation):
        #forward pass for activation functions we have coded 
        if activation == 'relu':
            return np.maximum(0, x)
        elif activation == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        elif activation == 'softmax':
            exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
            return exp_values / np.sum(exp_values, axis=1, keepdims=True)
        else:
            raise ValueError(f"Unknown activation function: {activation}")

    def _activation_backward(self, grad, activation, output):
        #backward pass for activation functions we have coded
        if activation == 'relu':
            grad[output <= 0] = 0
            return grad
        elif activation == 'sigmoid':
            return grad * output * (1 - output)
        elif activation == 'softmax':
            return grad
        else:
            raise ValueError(f"Unknown activation function: {activation}")

    def forward(self, X, training=True):
        self.activations_cache = [X]
        self.z_cache = []
        for i in range(len(self.weights)):
            z = np.dot(self.activations_cache[-1], self.weights[i]) + self.biases[i]
            self.z_cache.append(z)
            a = self._activation_forward(z, self.activations[i])
            if training and self.dropout_rates[i] > 0.0:
                dropout_mask = np.random.binomial(1, 1 - self.dropout_rates[i], size=a.shape)
                a *= dropout_mask / (1 - self.dropout_rates[i])
            self.activations_cache.append(a)
        return self.activations_cache[-1]

    def backward(self, X, y, lr, batch_size):
        m = X.shape[0]
        grad_w = [np.zeros_like(w) for w in self.weights]
        grad_b = [np.zeros_like(b) for b in self.biases]

        output_error = self.activations_cache[-1] - y  
        for i in reversed(range(len(self.weights))):
            grad_w[i] = np.dot(self.activations_cache[i].T, output_error) / batch_size
            grad_b[i] = np.sum(output_error, axis=0, keepdims=True) / batch_size

            grad_w[i] += self.l1 * np.sign(self.weights[i]) + self.l2 * self.weights[i]

            if i > 0:
                output_error = np.dot(output_error, self.weights[i].T)
                output_error = self._activation_backward(output_error, self.activations[i - 1],
                                                         self.activations_cache[i])
        for i in range(len(self.weights)):
            self.weights[i] -= lr * grad_w[i]
            self.biases[i] -= lr * grad_b[i]

    def loss(self, y_true, y_pred):
        #cross-entropy loss for multi-class classification 
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def fit(self, X_train, y_train, lr, epochs, batch_size):
        #training the network
        for epoch in range(epochs):
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            for batch_start in range(0, X_train.shape[0], batch_size):
                X_batch = X_train[batch_start:batch_start + batch_size]
                y_batch = y_train[batch_start:batch_start + batch_size]

                y_pred = self.forward(X_batch, training=True)
                self.backward(X_batch, y_batch, lr, batch_size)

            if epoch % 10 == 0:
                y_pred = self.forward(X_train, training=False)
                loss = self.loss(y_train, y_pred)
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        return self.forward(X, training=False)


# Load and preprocess the dataset
data = pd.read_csv('youth_smoking_drug_data_10000_rows_expanded.csv')

selected_features = [
    "Age_Group", 
    "Smoking_Prevalence",
    "Drug_Experimentation",
    "Socioeconomic_Status",
    "Parental_Supervision",
    "Community_Support",
]
X = data[selected_features].values

# Encode categorical features
from sklearn.preprocessing import LabelEncoder, StandardScaler

for col in ["Age_Group", "Socioeconomic_Status"]:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Select and normalize features
X = data[selected_features].values
scaler = StandardScaler()
X = scaler.fit_transform(X)

# One-hot encode target
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(data[["Access_to_Counseling"]].values)


# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define network parameters
layer_sizes = [X_train.shape[1], 32, 16, 2]  
activations = ['relu', 'relu', 'softmax']  
dropout_rates = [0.1, 0.1, 0.0]

# Initialize and train the network
nn = FullyConnectedNN(
    layer_sizes=layer_sizes,
    activations=activations,
    dropout_rates=dropout_rates,
    l1=0.01,
    l2=0.01
)

nn.fit(X_train, y_train, lr=0.01, epochs=200, batch_size=16)

y_train_pred = nn.predict(X_train)
train_loss = nn.loss(y_train, y_train_pred)
print(f"Training Loss: {train_loss}")

y_test_pred = nn.predict(X_test)
test_loss = nn.loss(y_test, y_test_pred)
print(f"Test Loss: {test_loss}")


# Evaluate the model
accuracy = np.mean(np.argmax(y_test_pred, axis=1) == np.argmax(y_test, axis=1))
print(f"Test Accuracy: {accuracy}")


Epoch 0, Loss: 0.6930474705917861
Epoch 10, Loss: 0.6930762532003921
Epoch 20, Loss: 0.6930514530592932
Epoch 30, Loss: 0.6930475482643893
Epoch 40, Loss: 0.6930800190979515
Epoch 50, Loss: 0.6930962975076013
Epoch 60, Loss: 0.6931142022016307
Epoch 70, Loss: 0.6931185556748413
Epoch 80, Loss: 0.693078058037064
Epoch 90, Loss: 0.6930729429847132
Epoch 100, Loss: 0.6930659579025633
Epoch 110, Loss: 0.6931427861733764
Epoch 120, Loss: 0.6930469609429798
Epoch 130, Loss: 0.6930957287355047
Epoch 140, Loss: 0.6930759238195155
Epoch 150, Loss: 0.6930465273219636
Epoch 160, Loss: 0.693054560016322
Epoch 170, Loss: 0.6931483466839354
Epoch 180, Loss: 0.6930467414987997
Epoch 190, Loss: 0.6930891431435964
Training Loss: 0.6930580446551555
Test Loss: 0.6927936140926708
Test Accuracy: 0.514
