In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [3]:
train_df.head(2)

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.171875,59.716081,0.043133,-0.703383,54.917224,70.084438,0.749798,-0.649512,0
1,1,87.09375,36.257973,0.435469,2.266057,3.417224,21.865069,7.03933,52.686251,0


In [4]:
test_df.head(2)

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve
0,117564,140.046875,54.5078,0.058862,-0.567263,2.337793,14.868335,9.59176,117.988781
1,117565,107.828125,51.578965,0.284368,-0.33843,1.574415,12.501437,11.694968,182.704822


In [5]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [6]:
device

'cuda'

In [7]:
X_train_val = train_df.drop(["id", "Class"], axis=1).to_numpy()
X_test = test_df.drop(["id"], axis=1).to_numpy()

In [8]:
y_train_val = train_df.Class.to_numpy()

In [9]:
sc = StandardScaler()
X_train_val = sc.fit_transform(X_train_val)
X_test = sc.transform(X_test)

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.10, random_state=42)

In [11]:
from xgboost import XGBClassifier

xg_model = XGBClassifier()

xg_model.fit(X_train, y_train)

xg_pre_val = xg_model.predict(X_val)

accuracy_score(y_val, xg_pre_val)

0.9915794845623883

In [None]:
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train).to(device)
y_val = torch.tensor(y_val).to(device)

In [None]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape)

In [None]:
class NeuralNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(8, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = NeuralNet().to(device)
loss_function = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
loss_track = []

In [None]:
epochs = 1000
for i in range(epochs):

    # forward pass
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred, y_train)

    # loss tracking
    loss_track.append(loss)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # printing things...
    if (i+1) % 100 == 0:
        print(f"epoch: {i}, loss={loss.item()}")

In [20]:
plt.plot([t.cpu().detach().numpy() for t in loss_track])

In [22]:
# predicated_val_ans = torch.argmax(model(X_val), 1)
predicated_val_ans = xg_model.predict(X_val)
print(predicated_val_ans)
print(y_val)

[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]


In [23]:
# val_accuracy = accuracy_score(y_val.cpu(), predicated_val_ans.cpu())
val_accuracy = accuracy_score(y_val, predicated_val_ans)
print("Validation accuracy is %.2f%%" % (val_accuracy*100))

Validation accuracy is 99.16%


In [24]:
# predicated_test_ans = torch.argmax(model(X_test), 1)
predicated_test_ans = xg_model.predict(X_test)
print(predicated_test_ans)

[0 0 0 ... 0 0 1]


In [25]:
# ans_df = pd.DataFrame({"id": test_df.id, "Class": predicated_test_ans.cpu()})
ans_df = pd.DataFrame({"id": test_df.id, "Class": predicated_test_ans})
ans_df.to_csv("outputs/ans15.csv", index=False)