In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Определение модели RankNet
class RankNet(nn.Module):
    def __init__(self, input_size):
        super(RankNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.fc(x)

# Создание датасета
class CustomDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

# Предположим, у вас есть данные в формате numpy arrays: features и targets
train_df = pd.read_csv('train_df.csv')
features = train_df.drop(columns='target')
targets = train_df['target']
# Преобразование данных в тензоры PyTorch
features_tensor = torch.tensor(features, dtype=torch.float32)
targets_tensor = torch.tensor(targets, dtype=torch.float32)

# Создание экземпляра модели RankNet
model = RankNet(input_size=78)  # Предполагается 78 признаков

# Определение функции потерь и оптимизатора
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Создание DataLoader для обучения модели
dataset = CustomDataset(features_tensor, targets_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Обучение модели RankNet
num_epochs = 10
for epoch in range(num_epochs):
    for batch_features, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs.squeeze(), batch_targets)
        loss.backward()
        optimizer.step()

In [5]:
df = pd.read_csv('train_df.csv')
df.head()

Unnamed: 0,search_id,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,target
0,758,9,0,0,1,20,3,40,0,3,...,0.204682,0.271755,0.055623,0,0,0,0.38648,0.0,0.0,0
1,758,9,0,0,1,20,3,40,0,3,...,0.195531,0.188787,0.036914,0,0,0,0.10982,0.0,0.0,0
2,758,9,0,0,1,20,3,40,0,3,...,0.148609,0.186517,0.027718,0,0,0,0.03674,0.0,0.0,0
3,758,9,0,0,1,20,3,40,0,3,...,0.223748,0.229039,0.051247,0,0,0,0.0,0.0,0.0,0
4,758,9,0,0,1,20,3,40,0,3,...,0.170935,0.249031,0.042568,0,0,0,0.0,0.0,0.0,0


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Определение модели RankNet
class RankNet(nn.Module):
    def __init__(self, input_size):
        super(RankNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.fc(x)

# Создание датасета с учетом search_id
class CustomDataset(Dataset):
    def __init__(self, features, targets, search_ids):
        self.features = features
        self.targets = targets
        self.search_ids = search_ids

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx], self.search_ids[idx]

# Предположим, у вас есть данные в формате numpy arrays: features, targets и search_ids

# Преобразование данных в тензоры PyTorch
train_df = pd.read_csv('train_df.csv')
features = train_df.drop(columns=['target', 'search_id']).values
targets = train_df['target'].values
search_ids = train_df['search_id'].values
features_tensor = torch.tensor(features, dtype=torch.float32)
targets_tensor = torch.tensor(targets, dtype=torch.float32)
search_ids_tensor = torch.tensor(search_ids)

# Создание экземпляра модели RankNet
model = RankNet(input_size=79)  # Предполагается 78 признаков

# Определение функции потерь и оптимизатора
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Создание DataLoader для обучения модели с учетом search_id
dataset = CustomDataset(features_tensor, targets_tensor, search_ids_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Обучение модели RankNet с учетом search_id
num_epochs = 10
for epoch in range(num_epochs):
    for batch_features, batch_targets, batch_search_ids in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs.squeeze(), batch_targets)
        loss.backward()
        optimizer.step()

In [9]:
test_df = pd.read_csv('test_df.csv')
features = test_df.drop(columns=['target', 'search_id']).values
targets = test_df['target'].values
search_ids = test_df['search_id'].values
features_tensor = torch.tensor(features, dtype=torch.float32)
targets_tensor = torch.tensor(targets, dtype=torch.float32)
search_ids_tensor = torch.tensor(search_ids)

def predict_with_search_id(model, features, search_ids):
    model.eval()
    with torch.no_grad():
        outputs = model(features)
    # Дополнительная логика для учета search_id при предсказании
    # Например, можно использовать информацию из search_ids для дополнительной обработки предсказаний
    return outputs

# Получение предсказаний с учетом search_id
predictions = predict_with_search_id(model, features_tensor, search_ids_tensor)

In [15]:
predictions.numpy().reshape(1, -1)

(1, 1529)

In [16]:
targets_tensor

tensor([0., 0., 0.,  ..., 1., 1., 0.])

In [32]:
trues = []
preds = []
for i ,(true, pred) in enumerate(zip(targets, predictions.numpy())):
  trues.append(true), preds.append(*pred)

In [28]:
print(*predictions.numpy()[1])

-1.866051


In [33]:
df = pd.DataFrame({'true_y': trues, 'pred_y': preds})

In [34]:
df.to_csv('sol.csv')