In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, auc
from sklearn.preprocessing import StandardScaler, label_binarize
import matplotlib.pyplot as plt


In [3]:
lux_train = pd.read_csv('lux_train.csv')
lux_val = pd.read_csv('lux_val.csv')
lux_test = pd.read_csv('lux_test.csv')


In [5]:
print(len(lux_train))
print(len(lux_val))
print(len(lux_test))

2310000
826250
1620000


In [29]:
#splitting data,labels
y_train = lux_train["target"].values
lux_train.drop("target", axis=1, inplace=True)

y_valid = lux_val["target"].values
lux_val.drop("target", axis=1, inplace=True)

y_test = lux_test["target"].values
lux_test.drop("target", axis=1, inplace=True)

# Scaling data
scaler = StandardScaler()
x_train = scaler.fit_transform(lux_train)
x_valid = scaler.transform(lux_val)
x_test = scaler.transform(lux_test)

# Convert to PyTorch tensors
x_train = torch.tensor(x_train, dtype=torch.float32)
x_valid = torch.tensor(x_valid, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32)  # changing dtype to float32
y_valid = torch.tensor(y_valid, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


In [30]:
import torch.utils.data as data

# 1. Convert the data and labels into a dataset using TensorDataset
train_dataset = data.TensorDataset(x_train, y_train)
valid_dataset = data.TensorDataset(x_valid, y_valid)
test_dataset = data.TensorDataset(x_test, y_test)

# 2. Define the batch size 
batch_size = 100

# 3. Create the DataLoaders
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = data.DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [24]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Define the device for training (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Neural Network Model Definition
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(FullyConnectedNN, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.l3 = nn.Linear(hidden_size2, num_classes)
        self.dropout1=nn.Dropout(0.5)
        self.dropout2=nn.Dropout(0.5)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.dropout1(out)
        out = self.l2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        out = self.l3(out)
        return out

# 3. Model Initialization, Loss Function and Optimizer
input_size = x_train.shape[1]
hidden_size1 = 64
hidden_size2 = 64
num_classes = len(torch.unique(y_train))

model = FullyConnectedNN(input_size, hidden_size1, hidden_size2, num_classes).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Model Training
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for i, (data, label) in enumerate(train_loader):
        data, label = data.to(device), label.to(device)
        
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label.view(-1, 1))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Print progress every 50 epochs
    if (epoch+1) % 50 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")





Epoch 50/1000, Loss: 8.115271696473839e-10
Epoch 100/1000, Loss: 8.079025002368858e-10
Epoch 150/1000, Loss: 8.236530037704469e-10
Epoch 200/1000, Loss: 8.149564283026333e-10
Epoch 250/1000, Loss: 8.206725342835924e-10
Epoch 300/1000, Loss: 7.9526408877635e-10
Epoch 350/1000, Loss: 8.14594405971444e-10
Epoch 400/1000, Loss: 8.176671029090448e-10
Epoch 450/1000, Loss: 8.097368804552597e-10
Epoch 500/1000, Loss: 8.205342219389744e-10
Epoch 550/1000, Loss: 8.14445284469868e-10
Epoch 600/1000, Loss: 7.953339561389729e-10
Epoch 650/1000, Loss: 7.983752184958354e-10
Epoch 700/1000, Loss: 8.113086499743512e-10
Epoch 750/1000, Loss: 8.157993105211791e-10
Epoch 800/1000, Loss: 8.03848119150144e-10
Epoch 850/1000, Loss: 8.221331252486552e-10
Epoch 900/1000, Loss: 8.061027620544362e-10
Epoch 950/1000, Loss: 8.080135514395569e-10
Epoch 1000/1000, Loss: 7.985038242452119e-10


In [25]:
from sklearn.metrics import f1_score, balanced_accuracy_score, recall_score, precision_score, confusion_matrix

# 5. Model Evaluation

model.eval()  # set the model to evaluation mode

y_true = []
y_pred = []

with torch.no_grad():
    for data, label in valid_loader:
        data, label = data.to(device), label.to(device)
        
        outputs = model(data)
        
        # Convert outputs to predicted classes
        _, predicted = torch.max(outputs.data, 1)
        
        y_true.extend(label.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Compute the metrics
f1 = f1_score(y_true, y_pred, average='macro')
bal_accuracy = balanced_accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='macro')
precision = precision_score(y_true, y_pred, average='macro')

print(f'F1 Score: {f1}')
print(f'Balanced Accuracy: {bal_accuracy}')
print(f'Recall: {recall}')
print(f'Precision: {precision}')


F1 Score: 0.3185567010309278
Balanced Accuracy: 0.5
Recall: 0.5
Precision: 0.23373676248108927


  _warn_prf(average, modifier, msg_start, len(result))
