In [1]:
import pandas as pd
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [61]:
train_x = pd.read_csv('train_x.csv')
train_y = pd.read_csv('train_y.csv')
test_x = pd.read_csv('test_x.csv')

X = train_x.iloc[:, 1:].values
y = train_y.iloc[:, 1].values
test_ids = test_x.iloc[:, -1]
test = test_x.iloc[:, :-1].values
X.shape, y.shape, test_ids.shape, test.shape

((14000, 90), (14000,), (6000,), (6000, 90))

In [62]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((11200, 90), (2800, 90), (11200,), (2800,))

Преобразование данных в тензоры

In [63]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [69]:
class MyNN(nn.Module):
    def __init__(self):
        super(MyNN, self).__init__()
        self.layer1 = nn.Linear(90, 128)
        self.bn1 = nn.BatchNorm1d(128)

        self.layer2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)

        self.layer3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)

        self.output_layer = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.bn1(self.layer1(x)))
        x = torch.relu(self.bn2(self.layer2(x)))
        x = torch.relu(self.bn3(self.layer3(x)))
        x = self.output_layer(x)
        return x

In [78]:
def early_stopping(val_loss, best_loss, patience_counter, patience=5, min_delta=0):
    if best_loss is None:
        best_loss = val_loss
        patience_counter = 0
    elif val_loss > best_loss - min_delta:
        patience_counter += 1
        if patience_counter >= patience:
            return True, best_loss, patience_counter
    else:
        best_loss = val_loss
        patience_counter = 0

    return False, best_loss, patience_counter

In [74]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=1000, patience=5, min_delta=0):
    best_loss = None
    patience_counter = 0
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

        scheduler.step(val_loss)

        should_stop, best_loss, patience_counter = early_stopping(val_loss, best_loss, patience_counter, patience, min_delta)
        if should_stop:
            print(f"Early stopping at epoch {epoch+1}")
            break

In [75]:
model = MyNN()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=1000, patience=100)

Epoch 1/1000, Train Loss: 2889805.7207, Validation Loss: 847865.4759
Epoch 2/1000, Train Loss: 302289.9946, Validation Loss: 27520.2061
Epoch 3/1000, Train Loss: 5931.0154, Validation Loss: 2539.3799
Epoch 4/1000, Train Loss: 2067.3631, Validation Loss: 933.5361
Epoch 5/1000, Train Loss: 1810.0153, Validation Loss: 2672.4907
Epoch 6/1000, Train Loss: 1445.8799, Validation Loss: 3766.6726
Epoch 7/1000, Train Loss: 1288.8727, Validation Loss: 6661.4756
Epoch 8/1000, Train Loss: 1116.3130, Validation Loss: 1039.6281
Epoch 9/1000, Train Loss: 649.6243, Validation Loss: 1514.3322
Epoch 10/1000, Train Loss: 733.3752, Validation Loss: 630.1478
Epoch 11/1000, Train Loss: 759.1089, Validation Loss: 924.6052
Epoch 12/1000, Train Loss: 551.8028, Validation Loss: 786.1324
Epoch 13/1000, Train Loss: 500.1745, Validation Loss: 812.8589
Epoch 14/1000, Train Loss: 450.5180, Validation Loss: 1758.8065
Epoch 15/1000, Train Loss: 495.9458, Validation Loss: 1404.3554
Epoch 16/1000, Train Loss: 809.7069, V

In [76]:
model.eval()
with torch.no_grad():
    predictions = model(X_test).squeeze()

predicted_years = torch.round(predictions).numpy()
output_df = pd.DataFrame({'index': test_ids, 'year': predicted_years})
output_df.to_csv('submission.csv', index=False)