In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
# Load the numeric-labeled dataset
df = pd.read_csv("combined_labeled_data_numeric.csv")

# Features and labels
X = df[["col1", "col2", "col3"]]  # independent variables
y = df["label"]                  # target variable

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(3, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.layers(x)

model = MLP()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train
for epoch in range(100):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

# Predict
with torch.no_grad():
    preds = model(X_test_tensor).round().numpy()
    

y_true = y_test.values  # Assuming y_test is a pandas Series

# Evaluate
print("Accuracy:", accuracy_score(y_true, preds))
print("Classification Report:\n", classification_report(y_true, preds))
print("Confusion Matrix:\n", confusion_matrix(y_true, preds))


Accuracy: 0.8490679412768863
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     44208
           1       0.85      1.00      0.92    248692

    accuracy                           0.85    292900
   macro avg       0.42      0.50      0.46    292900
weighted avg       0.72      0.85      0.78    292900

Confusion Matrix:
 [[     0  44208]
 [     0 248692]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
