In [1]:
import pandas as pd
from fancyimpute import KNN
import re
import xlsxwriter
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder


# Defina o caminho para o arquivo train.csv
file_path = r'C:\Users\Kkk\3D Objects\HCT\df_imputed.csv'

# Leia o arquivo CSV
df = pd.read_csv(file_path)


# Step 1: Prepare the data
data = df.drop(columns=['ID'])
X = data.drop(columns=['efs', 'efs_time'])
y = data['efs']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_val = torch.tensor(X_val.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_val = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

# Step 2: Define the neural network
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

input_size = X_train.shape[1]
model = Net(input_size)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Step 3: Train the model
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Step 4: Evaluate the model
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    val_loss = criterion(outputs, y_val)
    print(f'Validation Loss: {val_loss.item():.4f}')

# Step 5: Predict and prepare the submission
X_test = torch.tensor(df.drop(columns=['ID', 'efs', 'efs_time']).values, dtype=torch.float32)

# Configure the DataLoader for batch processing
batch_size = 32
test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

predictions = []

# Iterate through batches
for batch in test_loader:
    x_var = batch[0]
    ypred_var = model(x_var).sigmoid()
    predictions.append(ypred_var.detach().numpy())

# Combine the predictions into a single array
predictions = np.concatenate(predictions).reshape(-1)

# Create the submission DataFrame
submission = pd.DataFrame({'ID': df.index[:len(predictions)], 'prediction': predictions})

# Save to CSV
submission.to_csv("submission.csv", index=False)
print(submission)

Epoch [10/200], Loss: 175.9313
Epoch [20/200], Loss: 42.2066
Epoch [30/200], Loss: 2.8472
Epoch [40/200], Loss: 0.3107
Epoch [50/200], Loss: 0.9102
Epoch [60/200], Loss: 0.6243
Epoch [70/200], Loss: 0.2927
Epoch [80/200], Loss: 0.2526
Epoch [90/200], Loss: 0.2638
Epoch [100/200], Loss: 0.2539
Epoch [110/200], Loss: 0.2491
Epoch [120/200], Loss: 0.2496
Epoch [130/200], Loss: 0.2493
Epoch [140/200], Loss: 0.2490
Epoch [150/200], Loss: 0.2490
Epoch [160/200], Loss: 0.2490
Epoch [170/200], Loss: 0.2489
Epoch [180/200], Loss: 0.2489
Epoch [190/200], Loss: 0.2488
Epoch [200/200], Loss: 0.2487
Validation Loss: 0.2472
          ID  prediction
0          0    0.618926
1          1    0.625085
2          2    0.637764
3          3    0.656802
4          4    0.616778
...      ...         ...
28795  28795    0.646096
28796  28796    0.635733
28797  28797    0.660191
28798  28798    0.613554
28799  28799    0.614569

[28800 rows x 2 columns]
