In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from surprise import Reader, Dataset, SVDpp

In [None]:
df_train = pd.read_csv('train.csv')
df_train['feedback'] = df_train['feedback'].apply(lambda x: -1 if x == "D" else 1)

print(df_train.shape)

df_train.head()

In [None]:
reader = Reader(rating_scale=(df_train.feedback.min(), df_train.feedback.max()))
data = Dataset.load_from_df(df_train[['userId', 'topicId', 'feedback']], reader)

In [None]:
n_factors = 32
n_epochs = 20
random_state = 2005

algo = SVDpp(n_factors=n_factors, n_epochs=n_epochs, random_state=random_state)

algo.fit(data.build_full_trainset())

In [None]:
df_test = pd.read_csv('test.csv')
df_test = df_test.reset_index()
print(df_test.shape)
df_test.head()

In [None]:
result = []
preds = []

for _, row in df_test.iterrows():
    pred = algo.predict(uid=row.userId, iid=row.topicId).est
    result.append("D" if pred < 0 else "L")
    preds.append(pred)

In [None]:
with open(f'surprise_{n_factors}_{n_epochs}_{random_state}.txt', 'w') as fp:
    fp.write("\n".join(result))

np.save(f'surprise_{n_factors}_{n_epochs}_{random_state}_preds.npy', np.array(preds).astype(np.float16))

In [None]:
with open('surprise_8_10_2005_preds.npy', 'rb') as f:
    surprise_8_10_2005_preds = np.load(f)
    
with open('surprise_16_10_2005_preds.npy', 'rb') as f:
    surprise_16_10_2005_preds = np.load(f)

with open('surprise_8_10_0_preds.npy', 'rb') as f:
    surprise_8_10_0_preds = np.load(f)

In [None]:
predictions = [surprise_8_10_2005_preds, 
               surprise_16_10_2005_preds,
               surprise_8_10_0_preds]

In [None]:
df_test['feedback'] = np.zeros((len(df_test), 1), dtype=float)
for pred in predictions:
    df_test['feedback'] += pred / len(predictions)

In [None]:
predict = df_test['feedback'].apply(lambda x: 'L' if x > -0.1 else 'D')
predict.to_csv('surprise_top3_01_v3.csv', index=False, header=False)