In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/alcups2024/M/train.csv')
submission = pd.read_csv('/content/drive/MyDrive/alcups2024/M/submission.csv', index_col=0)

In [None]:
train_data

Unnamed: 0,user_id,course_id
0,39972,34
1,56815,51
2,63734,20
3,17896,81
4,36961,64
...,...,...
10267,10520,10
10268,62231,40
10269,150361,148
10270,4182,7


In [None]:
submission

Unnamed: 0_level_0,course_id_1,course_id_2,course_id_3
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
78,0,0,0
81,0,0,0
120,0,0,0
123,0,0,0
150,0,0,0
...,...,...,...
185864,0,0,0
186262,0,0,0
186691,0,0,0
186887,0,0,0


In [None]:
user_ids = train_data['user_id'].unique()
course_ids = train_data['course_id'].unique()
user_map = {user_id: idx for idx, user_id in enumerate(user_ids)}
course_map = {course_id: idx for idx, course_id in enumerate(course_ids)}

n_users = len(user_map)
n_courses = len(course_map)


train_data['user'] = train_data['user_id'].map(user_map)
train_data['course'] = train_data['course_id'].map(course_map)

In [None]:
train_data

Unnamed: 0,user_id,course_id,user,course
0,39972,34,0,0
1,56815,51,1,1
2,63734,20,2,2
3,17896,81,3,3
4,36961,64,4,4
...,...,...,...,...
10267,10520,10,4859,10
10268,62231,40,2461,77
10269,150361,148,4860,60
10270,4182,7,2895,5


In [None]:
class Recommender(nn.Module):
    def __init__(self, n_users, n_courses, n_factors=10):
        super(Recommender, self).__init__()
        self.user_embedding = nn.Embedding(n_users, n_factors)
        self.course_embedding = nn.Embedding(n_courses, n_factors)

    def forward(self, user, course):
        user_vec = self.user_embedding(user)
        course_vec = self.course_embedding(course)
        return (user_vec * course_vec).sum(1)

In [None]:
model = Recommender(n_users, n_courses)

In [None]:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

In [None]:
for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    user_input = torch.tensor(train_data['user'].values)
    course_input = torch.tensor(train_data['course'].values)
    output = model(user_input, course_input)
    loss = criterion(output, torch.ones_like(output))
    loss.backward()
    optimizer.step()
    print(epoch)

0
1
2
3
4
5
6
7
8
9


In [None]:
submission.index

Index([    78,     81,    120,    123,    150,    170,    171,    188,    246,
          259,
       ...
       184391, 184534, 185329, 185463, 185855, 185864, 186262, 186691, 186887,
       187206],
      dtype='int64', name='user_id', length=2900)

In [None]:
n = 0

In [None]:
# Генерация рекомендаций
recommendations = []
for user in submission.index:
    if user in user_map:  # Проверяем, существует ли пользователь в user_map
        user_id = user_map[user]
        courses_no_watch = set(course_map.values()) - set(train_data[train_data['user'] == user_id]['course'])
        scores = [(course, model(torch.tensor([user_id]), torch.tensor([course])).item()) for course in courses_no_watch]
        top_courses = sorted(scores, key=lambda x: x[1], reverse=True)[:3]
        recommendations.append([course_ids[course[0]] for course in top_courses])
    else:  # Если пользователь новый, рекомендуем случайные курсы
        random_courses = pd.Series(course_ids).sample(3).values
        recommendations.append(random_courses)
        n += 1




In [None]:
n

968

In [None]:
# Запись в submission.csv
for i in range(3):
    submission[f'course_id_{i+1}'] = [rec[i] for rec in recommendations]


In [None]:
submission

Unnamed: 0_level_0,course_id_1,course_id_2,course_id_3
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
78,34,189,170
81,145,171,117
120,192,190,149
123,31,206,174
150,172,7,147
...,...,...,...
185864,71,150,43
186262,27,106,161
186691,222,26,97
186887,141,198,130


In [None]:
submission.to_csv('ogo.csv')