Learning Pytorch, so I am going through old datasets and using pytorch to create a model.

In [241]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

In [242]:
column_names = [
    'Profile_mean', 'Profile_stdev', 'Profile_skewness', 'Profile_kurtosis', 
    'DM_mean', 'DM_stdev', 'DM_skewness', 'DM_kurtosis', 'Class'
]
df = pd.read_csv('HTRU_2.csv', names=column_names,header=None)
df.head()

Unnamed: 0,Profile_mean,Profile_stdev,Profile_skewness,Profile_kurtosis,DM_mean,DM_stdev,DM_skewness,DM_kurtosis,Class
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,0
1,102.507812,58.88243,0.465318,-0.515088,1.677258,14.860146,10.576487,127.39358,0
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909,0
3,136.75,57.178449,-0.068415,-0.636238,3.642977,20.95928,6.896499,53.593661,0
4,88.726562,40.672225,0.600866,1.123492,1.17893,11.46872,14.269573,252.567306,0


In [243]:
df.isnull().sum()

Profile_mean        0
Profile_stdev       0
Profile_skewness    0
Profile_kurtosis    0
DM_mean             0
DM_stdev            0
DM_skewness         0
DM_kurtosis         0
Class               0
dtype: int64

In [244]:
df['Class'].value_counts()

Class
0    16259
1     1639
Name: count, dtype: int64

In [245]:
y = df['Class']
X = df.drop('Class', axis=1)

In [246]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=62)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (14318, 8)
X_test shape: (3580, 8)
y_train shape: (14318,)
y_test shape: (3580,)


In [247]:
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [248]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [249]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [250]:
class LinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(8, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x)

In [251]:
def binary_accuracy(preds, targets):
    probs = torch.sigmoid(preds)
    preds_class = (probs >= 0.5).float()
    correct = (preds_class == targets).float().sum()
    return correct / targets.shape[0]


In [252]:
model = LinearModel()

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for i in range(epochs):
    model.train()
    total_loss = 0
    total_acc = 0
    count = 0
    for xb, yb in train_dataloader:
        pred = model(xb)
        loss = loss_fn(pred, yb)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        total_acc += binary_accuracy(pred, yb).item()
        count+=1
    avg_loss = total_loss / count
    avg_acc = total_acc / count
    print(f"Epoch {i+1} — Loss: {avg_loss:.4f}, Accuracy: {avg_acc:.4f}")


Epoch 1 — Loss: 0.1218, Accuracy: 0.9734
Epoch 2 — Loss: 0.0709, Accuracy: 0.9790
Epoch 3 — Loss: 0.0688, Accuracy: 0.9798
Epoch 4 — Loss: 0.0676, Accuracy: 0.9801
Epoch 5 — Loss: 0.0668, Accuracy: 0.9801
Epoch 6 — Loss: 0.0664, Accuracy: 0.9802
Epoch 7 — Loss: 0.0657, Accuracy: 0.9804
Epoch 8 — Loss: 0.0649, Accuracy: 0.9802
Epoch 9 — Loss: 0.0649, Accuracy: 0.9807
Epoch 10 — Loss: 0.0645, Accuracy: 0.9806


In [253]:
model.eval()
total_correct = 0
total_samples = 0

with torch.no_grad():
    for xb, yb in test_dataloader:
        preds = model(xb)
        acc = binary_accuracy(preds, yb)
        total_correct += acc.item() * xb.size(0)
        total_samples += xb.size(0)

final_acc = total_correct / total_samples
print(f"Test Accuracy: {final_acc}")


Test Accuracy: 0.9784916201117319


97709