In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np



train = pd.read_csv("train_with_cos.csv")

In [3]:
train = train.select_dtypes(np.number).fillna(1e10)
X_train = train.drop(['target'], axis=1)
y_train = train['target'].values

# Обучение модели

In [3]:
X_tr, _, y_tr, _ = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

In [4]:
for column in X_tr.columns:
    X_tr[column] = (X_tr[column] - X_tr[column].mean()) / X_tr[column].std()

In [5]:
X_train_tensor = torch.Tensor(X_tr.values)
y_train_tensor = torch.Tensor(y_tr)

In [6]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = 8)

In [4]:
# Определение нейронной сети для модели RankNet
class RankNet(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(RankNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_dim, 256)
        self.fc3 = nn.Linear(256, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

In [8]:

input_dim = 39 
hidden_dim = 512
model = RankNet(input_dim, hidden_dim)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

criterion
num_epochs = 15
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs, targets
        targets = targets.view(-1, 1)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    print(f'{loss} - loss on epoch {epoch}')

0.6285024881362915 - loss on epoch 0
0.6265425086021423 - loss on epoch 1
0.6095401644706726 - loss on epoch 2
0.6218496561050415 - loss on epoch 3
0.6463740468025208 - loss on epoch 4
0.5962731838226318 - loss on epoch 5
0.6142375469207764 - loss on epoch 6
0.6490315198898315 - loss on epoch 7
0.5960783958435059 - loss on epoch 8
0.592505156993866 - loss on epoch 9
0.6552513837814331 - loss on epoch 10
0.637855052947998 - loss on epoch 11
0.6192449927330017 - loss on epoch 12
0.6272112131118774 - loss on epoch 13
0.6373885869979858 - loss on epoch 14


In [11]:
torch.save(model.state_dict(), "model")

# Загрузим модель и получим ответы

In [5]:
model = RankNet(39, 512)
model.load_state_dict(torch.load("model"))


<All keys matched successfully>

In [6]:
for column in X_train.columns:
    X_train[column] = (X_train[column] - X_train[column].mean()) / X_train[column].std()

In [7]:
X_train_tensor = torch.Tensor(X_train.values)
train_dataset = TensorDataset(X_train_tensor)

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers = 8)

In [8]:
model.eval()
with torch.no_grad():
    pred = torch.tensor([], dtype=torch.float32)
    for batch, X in enumerate(train_loader):
        pred = torch.cat((pred, model(X[0])), dim=0)

In [9]:
print(pred)

tensor([[-0.5396],
        [-0.5435],
        [-0.4649],
        ...,
        [-0.9095],
        [-0.7821],
        [-0.3664]])


In [11]:
pred = pred.tolist()
df = pd.DataFrame(pred, columns=['nn_pred'])

df.to_csv("nn_pred.csv")