In [None]:
# !pip install opendatasets
# import opendatasets as od
# od.download("https://www.kaggle.com/competitions/cpe-ai-hackathon-2025-medical-data-classification")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
train = pd.read_csv("/content/cpe-ai-hackathon-2025-medical-data-classification/train.csv")
test = pd.read_csv("/content/cpe-ai-hackathon-2025-medical-data-classification/test.csv")

In [None]:
train = train.drop(columns='id')
id_test = test.id
test = test.drop(columns='id')
test = test.drop(columns='Class')

In [None]:
train = train.fillna(train.median())
test = test.fillna(test.median())

x_train = train.drop(columns=['Class'])
y_train = train.Class

In [None]:
x_train = x_train.values.astype(np.float32)
tensor_x_train = torch.tensor(x_train)
y_train = y_train.values.astype(np.float32).reshape(-1, 1)
tensor_y_train = torch.tensor(y_train)

test = test.values.astype(np.float32)
tensor_test = torch.tensor(test)

In [None]:
class neural_network(nn.Module):
    def __init__(self, input_dim, output_dim=1, hidden_dims=[256, 128, 64, 32], dropout_rate=0.25):
        super(neural_network, self).__init__()

        self.input_bn = nn.BatchNorm1d(input_dim)

        layers = []
        in_dim = input_dim

        for h_dim in hidden_dims:
            layers.append(nn.Linear(in_dim, h_dim))
            layers.append(nn.BatchNorm1d(h_dim))
            layers.append(nn.LeakyReLU(negative_slope=0.1))
            layers.append(nn.Dropout(dropout_rate))
            in_dim = h_dim

        self.output_layer = nn.Linear(in_dim, output_dim)
        self.layers = nn.Sequential(*layers)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight, nonlinearity='leaky_relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.input_bn(x)
        out = self.layers(x)
        out = self.output_layer(out)

        return out


In [None]:
model = neural_network(tensor_x_train.shape[1])
pos_weight = torch.tensor([3.0])
error = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)

col_F1, col_F1_val = [], []
epoch, n = 2000, 5

for i in range(0, epoch):
  model.train()
  y_hat = model(tensor_x_train)
  loss_BCE = error(y_hat, tensor_y_train)

  optimizer.zero_grad()
  loss_BCE.backward()
  optimizer.step()

  with torch.no_grad():
    y_hat = (y_hat >= 0.5).float()

  if i == (n-1):
    print(f"Epoch [{i+1}/{epoch}] | Loss(BCE): {loss_BCE:.4f}")
    n += 1


Epoch [5/2000] | Loss(BCE): 1.1065
Epoch [6/2000] | Loss(BCE): 1.0731
Epoch [7/2000] | Loss(BCE): 1.0375
Epoch [8/2000] | Loss(BCE): 0.9950
Epoch [9/2000] | Loss(BCE): 0.9727
Epoch [10/2000] | Loss(BCE): 0.9551
Epoch [11/2000] | Loss(BCE): 0.9406
Epoch [12/2000] | Loss(BCE): 0.9292
Epoch [13/2000] | Loss(BCE): 0.8964
Epoch [14/2000] | Loss(BCE): 0.8682
Epoch [15/2000] | Loss(BCE): 0.8681
Epoch [16/2000] | Loss(BCE): 0.8448
Epoch [17/2000] | Loss(BCE): 0.8210
Epoch [18/2000] | Loss(BCE): 0.7915
Epoch [19/2000] | Loss(BCE): 0.7911
Epoch [20/2000] | Loss(BCE): 0.7619
Epoch [21/2000] | Loss(BCE): 0.7622
Epoch [22/2000] | Loss(BCE): 0.7456
Epoch [23/2000] | Loss(BCE): 0.7267
Epoch [24/2000] | Loss(BCE): 0.7152
Epoch [25/2000] | Loss(BCE): 0.6951
Epoch [26/2000] | Loss(BCE): 0.6980
Epoch [27/2000] | Loss(BCE): 0.6799
Epoch [28/2000] | Loss(BCE): 0.6731
Epoch [29/2000] | Loss(BCE): 0.6621
Epoch [30/2000] | Loss(BCE): 0.6554
Epoch [31/2000] | Loss(BCE): 0.6467
Epoch [32/2000] | Loss(BCE): 0.63

In [None]:
with torch.no_grad():
  y_hat = model(tensor_test)
  y_hat = (y_hat > 0.5).float()
  y_hat = y_hat.squeeze().numpy().astype(int)
y_hat = pd.DataFrame(y_hat)

summarize = pd.concat([id_test, y_hat], axis=1)
summarize.columns = ['id', 'Class']
summarize.to_csv('My_Answer_data.csv', index=False)
print(summarize['Class'].unique())
summarize


[0 1]


Unnamed: 0,id,Class
0,0,0
1,1,0
2,2,0
3,3,1
4,4,0
...,...,...
7483,7483,0
7484,7484,0
7485,7485,0
7486,7486,0
