Import all Modules

In [37]:
import pandas as pd
import numpy as np 

##Import all SKlearn modules
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# import all pytorch modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

Displaying Data

In [38]:
df = pd.read_csv("Student_performance_data.csv")
df.head()

Unnamed: 0,StudentID,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA,GradeClass
0,1001,17,1,0,2,19.833723,7,1,2,0,0,1,0,2.929196,2.0
1,1002,18,0,0,1,15.408756,0,0,1,0,0,0,0,3.042915,1.0
2,1003,15,0,2,3,4.21057,26,0,2,0,0,0,0,0.112602,4.0
3,1004,17,1,0,3,10.028829,14,0,3,1,0,0,0,2.054218,3.0
4,1005,17,1,0,2,4.672495,17,1,3,0,0,0,0,1.288061,4.0


Start Modeling

In [39]:
# Separate features and target variable
X = df.drop("GradeClass", axis=1)
y = df["GradeClass"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Encode the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long) # Use torch.long for classification targets
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32  # You can adjust this
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Shuffle training data
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) # No need to shuffle test data

Define the Neural Network Model

In [40]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, num_classes)  # Output layer (5 classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out

input_size = X_train_scaled.shape[1]  # Number of features
num_classes = len(np.unique(y_train_encoded)) # Number of unique classes

model = SimpleNN(input_size, num_classes)

Define Loss Function

In [41]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adjust learning rate if needed

Train the Model

In [42]:
num_epochs = 50  # You can adjust this

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 1.3053
Epoch [2/50], Loss: 0.8409
Epoch [3/50], Loss: 0.5257
Epoch [4/50], Loss: 0.6115
Epoch [5/50], Loss: 0.6709
Epoch [6/50], Loss: 0.4349
Epoch [7/50], Loss: 0.5824
Epoch [8/50], Loss: 0.3213
Epoch [9/50], Loss: 0.6130
Epoch [10/50], Loss: 0.4139
Epoch [11/50], Loss: 0.6013
Epoch [12/50], Loss: 0.7923
Epoch [13/50], Loss: 0.3978
Epoch [14/50], Loss: 0.5103
Epoch [15/50], Loss: 0.2881
Epoch [16/50], Loss: 0.4874
Epoch [17/50], Loss: 0.2304
Epoch [18/50], Loss: 0.3399
Epoch [19/50], Loss: 0.2348
Epoch [20/50], Loss: 0.1839
Epoch [21/50], Loss: 0.1904
Epoch [22/50], Loss: 0.4947
Epoch [23/50], Loss: 0.2975
Epoch [24/50], Loss: 0.4880
Epoch [25/50], Loss: 0.7085
Epoch [26/50], Loss: 0.3220
Epoch [27/50], Loss: 0.3954
Epoch [28/50], Loss: 0.2704
Epoch [29/50], Loss: 0.2653
Epoch [30/50], Loss: 0.1726
Epoch [31/50], Loss: 0.1994
Epoch [32/50], Loss: 0.5420
Epoch [33/50], Loss: 0.3329
Epoch [34/50], Loss: 0.3038
Epoch [35/50], Loss: 0.1845
Epoch [36/50], Loss: 0.1241
E

Evaluate the Model

In [43]:
model.eval()
with torch.no_grad():
    y_pred = []
    y_true = []
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

    y_pred = np.array(y_pred, dtype=np.int64)
    y_true = np.array(y_true, dtype=np.int64)

    print("y_true data type:", y_true.dtype)
    print("y_pred data type:", y_pred.dtype)
    print("Sample y_true:", y_true[:10])
    print("Sample y_pred:", y_pred[:10])

    print("label_encoder.classes_ type:", type(label_encoder.classes_))
    print("label_encoder.classes_:", label_encoder.classes_)
    print("First element type:", type(label_encoder.classes_[0]))

    target_names = [str(i) for i in label_encoder.classes_]  # Convert to strings
    print("Target Names: ", target_names)
    print(classification_report(y_true, y_pred, target_names=target_names))

y_true data type: int64
y_pred data type: int64
Sample y_true: [4 1 2 0 4 2 3 3 3 4]
Sample y_pred: [4 1 0 1 4 2 4 3 3 4]
label_encoder.classes_ type: <class 'numpy.ndarray'>
label_encoder.classes_: [0. 1. 2. 3. 4.]
First element type: <class 'numpy.float64'>
Target Names:  ['0.0', '1.0', '2.0', '3.0', '4.0']
              precision    recall  f1-score   support

         0.0       0.60      0.27      0.38        22
         1.0       0.59      0.90      0.72        49
         2.0       0.94      0.73      0.82        85
         3.0       0.79      0.79      0.79        86
         4.0       0.92      0.95      0.93       237

    accuracy                           0.84       479
   macro avg       0.77      0.73      0.73       479
weighted avg       0.85      0.84      0.84       479

