In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score, mean_absolute_error

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
data = pd.read_csv('Student_Performance.csv')

In [None]:
print(data.isnull().sum())

Hours Studied                       0
Previous Scores                     0
Extracurricular Activities          0
Sleep Hours                         0
Sample Question Papers Practiced    0
Performance Index                   0
dtype: int64


In [None]:
print(data.duplicated().sum())

127


In [None]:
data = data.drop_duplicates()

In [None]:
X = data.drop('Performance Index', axis=1)
y = data['Performance Index'].values.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
preprocessor = ColumnTransformer([
    ('onehot', OneHotEncoder(), ['Extracurricular Activities']),
    ('numerical', StandardScaler(), ['Hours Studied', 'Previous Scores', 'Sleep Hours', 'Sample Question Papers Practiced'])
])

In [None]:
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [None]:
def to_tensor(x):
  return torch.tensor(x if not hasattr(x, 'toarray') else x.toarray(), dtype=torch.float32).to(device)

In [None]:
X_train_tensor = to_tensor(X_train_processed)
y_train_tensor = to_tensor(y_train)
X_test_tensor = to_tensor(X_test_processed)
y_test_tensor = to_tensor(y_test)

In [None]:
class MLP_regression(nn.Module):
  def __init__(self, input_dim):
    super().__init__()
    self.net = nn.Sequential(
        nn.Linear(input_dim, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1)
    )
  def forward(self, x):
    return self.net(x)

In [None]:
model = MLP_regression(X_train_tensor.shape[1]).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 1000
for epoch in range(epochs):
  outputs = model(X_train_tensor)
  loss = criterion(outputs, y_train_tensor)

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 79.6065
Epoch [200/1000], Loss: 31.7279
Epoch [300/1000], Loss: 16.9677
Epoch [400/1000], Loss: 7.7043
Epoch [500/1000], Loss: 4.9273
Epoch [600/1000], Loss: 4.4829
Epoch [700/1000], Loss: 4.3814
Epoch [800/1000], Loss: 4.3242
Epoch [900/1000], Loss: 4.2820
Epoch [1000/1000], Loss: 4.2503


In [None]:
model.eval()
with torch.no_grad():
    test_preds = model(X_test_tensor)
    test_loss = criterion(test_preds, y_test_tensor)

    # Преобразование в numpy массивы
    y_test_np = y_test_tensor.cpu().numpy()
    test_preds_np = test_preds.cpu().numpy()

    # Расчет метрик
    mae = mean_absolute_error(y_test_np, test_preds_np)
    r2 = r2_score(y_test_np, test_preds_np)
    mse = test_loss.item()
    rmse = np.sqrt(mse)

    print("\nEvaluation Metrics:")
    print(f"MAE: {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")


Evaluation Metrics:
MAE: 1.6858
R² Score: 0.9879
MSE: 4.4999
RMSE: 2.1213
