In [None]:
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix
from tqdm.notebook import trange
from tqdm import tqdm
import pandas as pd
import torch
import torch.nn as nn
from implicit.als import AlternatingLeastSquares
import torch.optim as optim

train_zvuk = pd.read_parquet('train_zvuk.parquet')

train_smm = pd.read_parquet('train_smm.parquet')

test_smm = pd.read_parquet('test_smm.parquet')

test_zvuk = pd.read_parquet('test_zvuk.parquet')


def pivot_csr(df):
    row_indices = pd.factorize(df['user_id'])[0]
    col_indices = pd.factorize(df['item_id'])[0]
    
    # Создаем CSR-матрицу
    csr_result = csr_matrix((df['rating'], (row_indices, col_indices)), 
                         shape=(len(df['user_id'].unique()), len(df['item_id'].unique())))
    return csr_result
    
def model_1(test_zvuk, train_zvuk,train_smm, test_smm):
    u_ids_zz = set(test_zvuk.user_id.unique())
    u_ids_ss = set(test_smm.user_id.unique())
    US_ID = u_ids_ss.intersection(u_ids_zz)
    
    train_z = test_zvuk[test_zvuk['user_id'].isin(US_ID)].sort_values(by='user_id', ascending=False)
    train_s = test_smm[test_smm['user_id'].isin(US_ID) ].sort_values(by='user_id', ascending=False)
    RET_Z, RET_S, RET_Z_i, RET_S_i = train_z.user_id.unique(), train_s.user_id.unique(), train_z.item_id.unique(), train_s.item_id.unique()
    train_z_csr = pivot_csr(train_z)
    train_s_csr = pivot_csr(train_s)
    user_factors_z = train_z_csr
    item_factors_z = train_z_csr.transpose()
    user_factors_s = train_s_csr
    item_factors_s = train_s_csr.transpose()
    df_z = train_z_csr
    df_s = train_s_csr

    X_train = []
    y_train = []
    for user in range(df_z.shape[0]):
        for item in range(df_z.shape[1]):
            if df_z[user, item] > 0: 
                X_train.append(np.concatenate((user_factors_z[user].toarray()[0],item_factors_z[item].toarray()[0], item_factors_s[item].toarray()[0],user_factors_s[user].toarray()[0]), axis=0))  # Добавляем в X_train вектор пользователя
                y_train.append(np.array([df_z[user, item],df_s[user, item]]))  # Добавляем значение в y_train
            if len(X_train)>=10000:
                break
        if len(X_train)>=10000:
                break
    X_train = torch.tensor(np.array(X_train)).float()
    y_train = torch.tensor(np.array(y_train)).float()
    mean = y_train.mean()
    std = y_train.std()
    # Стандартизация
    y_train = (y_train - mean) / std
    
    class FullyConnectedNN(nn.Module):
        def __init__(self):
            super(FullyConnectedNN, self).__init__()
            self.fc1 = nn.Linear(X_train.shape[1], 1000)
            self.fc2 = nn.Linear(1000, 1000)
            self.fc3 = nn.Linear(1000, 2)
            self.relu = nn.ReLU()
    
        def forward(self, x):
                x = self.relu(self.fc1(x))
                x = self.fc3(x)  # Выходной слой без активации
                return x
    # Инициализация модели, функции потерь и оптимизатора
    model_nn_z1 = FullyConnectedNN()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model_nn_z1.parameters(), lr=0.001)
    
    # Пример данных
    # Замените на ваши данные
    
    # Цикл обучения
    num_epochs = 5  # Количество эпох
    batch_size = 32
    LOSS=[]
    for epoch in range(num_epochs):
        l_s=0
        for i in range(0, len(y_train), batch_size):
            model_nn_z1.train()  # Установка модели в режим обучения
        
            # Прямой проход
            outputs = model_nn_z1(X_train[i:i+batch_size])
            loss = criterion(outputs, y_train[i:i+batch_size])
            
            # Обратный проход и оптимизация
            optimizer.zero_grad()  # Обнуление градиентов
            loss.backward()        # Вычисление градиентов
            optimizer.step()       # Обновление весов
        
            # Вывод информации о ходе обучения
            l_s+=loss*batch_size/len(y_train)
        LOSS.append(l_s.detach().numpy())
    return model_nn_z1, RET_Z, RET_S,RET_Z_i, RET_S_i, user_factors_z, item_factors_z, user_factors_s, item_factors_s

def model_2(test_zvuk, train_zvuk,train_smm, test_smm):
    u_ids_z = set(test_zvuk.user_id.unique())
    u_ids_s = set(test_smm.user_id.unique())
    US_ID = u_ids_z-u_ids_s
    train_z = test_zvuk[test_zvuk['user_id'].isin(US_ID)].sort_values(by='user_id', ascending=False)
    train_z_csr = pivot_csr(train_z)
    df_z = train_z_csr
    user_factors_z = train_z_csr
    item_factors_z = train_z_csr.transpose()
    X_train = []
    y_train = []
    RET_Z, RET_Z_i = train_z.user_id.unique(), train_z.item_id.unique()
    for user in range(df_z.shape[0]):
        for item in range(df_z.shape[1]):
            if df_z[user, item] > 0:  # Проверяем наличие взаимодействия
                X_train.append(np.concatenate((user_factors_z[user].toarray()[0],item_factors_z[item].toarray()[0]), axis=0))  # Добавляем в X_train вектор пользователя
                y_train.append(df_z[user, item])  # Добавляем значение в y_train
            if len(X_train)>=10000:
                break
        if len(X_train)>=10000:
                break
    X_train = torch.tensor(np.array(X_train)).float()

    y_train = torch.tensor(np.array(y_train)).float()
    
    mean = y_train.mean()
    std = y_train.std()
    
    # Стандартизация
    y_train = (y_train - mean) / std
    class FullyConnectedNN(nn.Module):
        def __init__(self):
            super(FullyConnectedNN, self).__init__()
            self.fc1 = nn.Linear(X_train.shape[1], 1000)
            self.fc2 = nn.Linear(1000, 1000)
            self.fc3 = nn.Linear(1000, 1)
            self.relu = nn.ReLU()
    
        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.fc3(x)  # Выходной слой без активации
            return x
    
    # Инициализация модели, функции потерь и оптимизатора
    model_nn_z1 = FullyConnectedNN()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model_nn_z1.parameters(), lr=0.001)
    
    # Пример данных
    # Замените на ваши данные
    
    # Цикл обучения
    num_epochs = 5  # Количество эпох
    batch_size = 32
    LOSS=[]
    for epoch in range(num_epochs):
        l_s=0
        for i in range(0, len(y_train), batch_size):
            model_nn_z1.train()  # Установка модели в режим обучения
        
            # Прямой проход
            outputs = model_nn_z1(X_train[i:i+batch_size])
            loss = criterion(outputs.flatten(), y_train[i:i+batch_size])
        
            # Обратный проход и оптимизация
            optimizer.zero_grad()  # Обнуление градиентов
            loss.backward()        # Вычисление градиентов
            optimizer.step()       # Обновление весов
        
            # Вывод информации о ходе обучения
            l_s+=loss*batch_size/len(y_train)
        LOSS.append(l_s.detach().numpy())
    return model_nn_z1, RET_Z, RET_Z_i, user_factors_z, item_factors_z

def test_intersect(test_zvuk, train_zvuk,train_smm, test_smm, RET_Z, RET_S,RET_Z_i, RET_S_i, model_1):
    prediction_zvuk = {}
    prediction_smm = {}
    
    for ii in range(len(RET_Z)):
        preds =torch.tensor([])
        preds2 =torch.tensor([]) 
        for jj in range(10000):
            preds = torch.cat((preds, torch.tensor([model_1(torch.tensor(np.concatenate((user_factors_z[ii].toarray()[0],item_factors_z[jj].toarray()[0], item_factors_s[jj].toarray()[0],user_factors_s[ii].toarray()[0]))).float())[0]])))
        for jj in range(10000):
            preds2= torch.cat((preds2, torch.tensor([model_1(torch.tensor(np.concatenate((user_factors_z[ii].toarray()[0],item_factors_z[jj].toarray()[0], item_factors_s[jj].toarray()[0],user_factors_s[ii].toarray()[0]))).float())[1]])))
        preds_ij = torch.sort(preds)[1][:10].detach().numpy()
        preds_iijj = torch.sort(preds2)[1][:10].detach().numpy()
        for k in range(len(preds_ij)):
            preds_ij[k] = RET_Z_i[preds_ij[k]]
        for k in range(len(preds_iijj)):
            preds_iijj[k] = RET_S_i[preds_iijj[k]]
        prediction_smm[RET_Z[ii]] = preds_iijj
        prediction_zvuk[RET_Z[ii]] =preds_ij
    df_data1 = {
    'index': range(len(prediction_zvuk)),
    'user_id': [],
    'item_ids': []
    }
    for key, value in prediction_zvuk.items():
        df_data1['user_id'].append(key)  
        df_data1['item_ids'].append(value)
    df1 = pd.DataFrame(df_data1)
    df_data2 = {
    'index': range(len(prediction_smm)),
    'user_id': [],
    'item_ids': []
    }
    for key, value in prediction_smm.items():
        df_data2['user_id'].append(key)  
        df_data2['item_ids'].append(value)
    df2 = pd.DataFrame(df_data2)
    return df1, df2
    
def test_z(test_zvuk, train_zvuk,train_smm, test_smm, RET_Z, RET_S,RET_Z_i, RET_S_i, model_111):
    prediction_zvuk = {}    
    for ii in range(RET_Z):
        preds =torch.tensor([])
        for jj in range(10000):
            preds = torch.cat((preds, torch.tensor([model_111(torch.tensor(np.concatenate((user_factors_z[ii].toarray()[0],item_factors_z[jj].toarray()[0]))).float())])))
        preds_ij = torch.sort(preds)[1][:10].detach().numpy()
        for k in range(len(preds_ij)):
            preds_ij[k] = RET_Z_i[preds_ij[k]]
        prediction_zvuk[RET_Z[ii]] =preds_ij
    df_data = {
    'index': range(len(prediction_zvuk)),
    'user_id': [],
    'item_ids': []
    }
    for key, value in prediction_zvuk.items():
        df_data['user_id'].append(key)  
        df_data['item_ids'].append(value)
    df = pd.DataFrame(df_data)

    return df



model_nn_z1, RET_Z, RET_S,RET_Z_i, RET_S_i, user_factors_z, item_factors_z, user_factors_s, item_factors_s = model_1(test_zvuk, train_zvuk,train_smm, test_smm)



z1, s1 =test_intersect(test_zvuk, train_zvuk,train_smm, test_smm, RET_Z, RET_S,RET_Z_i, RET_S_i, model_nn_z1)

model_nn_z2, RET_Z, RET_Z_i, user_factors_z, item_factors_z =  model_2(test_zvuk, train_zvuk,train_smm, test_smm)

z2 = test_z(test_zvuk, train_zvuk,train_smm, test_smm, RET_Z, RET_S,RET_Z_i, RET_S_i, model_nn_z2)

model_nn_z3, RET_Z, RET_Z_i, user_factors_z, item_factors_z =  model_2(test_smm, train_zvuk,train_smm, test_zvuk)

s2 = test_z(test_smm, train_zvuk,train_smm, test_zvuk, RET_Z, RET_S,RET_Z_i, RET_S_i, model_nn_z3)

Z = pd.concat([z1,z2], ignore_index=True)
Z['index'] = Z.index
S = pd.concat([s1,s2], ignore_index = True)
S['index'] = S.index

Z.to_parquet('submission_zvuk.parquet', index = False)
S.to_parquet('submussion_smm.parquet', index = False)