<a href="https://colab.research.google.com/github/Aakriti555/Nammi-assignment/blob/main/NAAMI_Task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Load data
df = pd.read_csv("/content/train_set.csv")

In [None]:
# Step 2: Preprocessing
# Drop ID column
df.drop(columns=["ID"], inplace=True)

In [None]:
# Separate features and target
X = df.drop(columns=["CLASS"])
y = df["CLASS"]

In [None]:
# Remove rows with infinite or NaN values
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.dropna(inplace=True)
y = y.loc[X.index]

In [None]:
# Show some rows of the csv

df.head()

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,...,Feature_3230,Feature_3231,Feature_3232,Feature_3233,Feature_3234,Feature_3235,Feature_3236,Feature_3237,Feature_3238,CLASS
0,18281.541667,18432.0,9409.650391,0.514708,0.0113,0.045369,2.803803,0.356658,1.803803,564.93625,...,382.968383,2214.0,1.0,136.625113,0.06171,0.0,28.154838,4.174959,0.06171,0
1,20010.083333,20100.0,8303.049072,0.417707,0.014959,0.080294,2.338398,0.429532,1.338398,31.291507,...,452.986164,2548.5,1.0,232.564022,0.090548,0.0,27.934229,3.93195,0.090548,1
2,27260.125,27437.0,12189.649414,0.44716,0.011428,0.046402,2.782842,0.359345,1.782842,11.965643,...,419.781765,3400.0,1.0,233.593529,0.068704,0.0,27.904807,4.085035,0.068704,1
3,41938.125,42138.0,17866.433594,0.426019,0.009908,0.034878,3.060655,0.326727,2.060655,8.966286,...,439.023968,5424.0,1.0,427.429572,0.078803,0.0,27.870588,4.011726,0.078803,0
4,41274.125,41439.0,14315.041992,0.346828,0.013596,0.06568,2.478506,0.403469,1.478506,34.898671,...,485.209184,5096.0,1.0,726.731554,0.142608,0.0,28.846909,3.571352,0.142608,0


In [None]:
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1)

In [None]:
# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [None]:
# Step 3: Define Logistic Regression Model
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegressionModel(X.shape[1])


In [None]:
# Step 4: Train the Model
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 50

In [None]:
for epoch in range(epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [10/50], Loss: 0.4679
Epoch [20/50], Loss: 0.3103
Epoch [30/50], Loss: 0.2695
Epoch [40/50], Loss: 0.1987
Epoch [50/50], Loss: 0.1653


In [None]:
# Step 5: Evaluate the Model
def evaluate(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch_X, batch_y in loader:
            outputs = model(batch_X)
            all_preds.extend(outputs.numpy())
            all_labels.extend(batch_y.numpy())
    preds = np.array(all_preds).flatten()
    labels = np.array(all_labels).flatten()
    pred_labels = (preds >= 0.5).astype(int)

    acc = accuracy_score(labels, pred_labels)
    auc = roc_auc_score(labels, preds)
    recall = recall_score(labels, pred_labels)
    f1 = f1_score(labels, pred_labels)
    cm = confusion_matrix(labels, pred_labels)
    specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])

    return acc, auc, recall, specificity, f1

In [None]:
acc, auc, recall, specificity, f1 = evaluate(model, val_loader)

In [None]:
print("\nModel Evaluation on Validation Set:")
print(f"Accuracy: {acc:.4f}")
print(f"AUROC: {auc:.4f}")
print(f"Sensitivity (Recall/TPR): {recall:.4f}")
print(f"Specificity (TNR): {specificity:.4f}")
print(f"F1 Score: {f1:.4f}")



Model Evaluation on Validation Set:
Accuracy: 0.5750
AUROC: 0.5238
Sensitivity (Recall/TPR): 0.6316
Specificity (TNR): 0.5238
F1 Score: 0.5854
