### Global params

In [None]:
%load_ext autoreload
import pandas as pd

### Pre-process

In [None]:
%%time
%autoreload 2
from ratingmatrix import rating_matrix
from user_keywords import load_content_data, load_events_data, merge_on_docID, get_user_counts, get_user_profile

df_content = load_content_data('data/content_refine')
df_content = df_content.rename(columns={"id": "documentId"})
# TODO: using num_days for testing
df_events = load_events_data('data/active1000', num_days=None)
df_events = df_events.drop(['title', 'publishtime'], axis=1)
df_merged = merge_on_docID(df_events, df_content)
df_user_frequencies = get_user_counts(df_merged)
df_user_profile = get_user_profile(df_user_frequencies)
df_user_profile.to_pickle('data/matrix-df/user_profiles.pkl')
df_user_item = rating_matrix(df_events)
df_user_item.to_pickle('data/matrix-df/user_item.pkl')




### Load preprocessed matrices + train_test

In [None]:
%autoreload 2
from ratingmatrix import rating_matrix_train_test_split

df_user_profile = pd.read_pickle('data/matrix-df/user_profiles.pkl')
df_user_profile.head()

df_user_item = pd.read_pickle('data/matrix-df/user_item.pkl')
df_user_item.head()

index = df_user_item.index
columns = df_user_item.columns
train, test = rating_matrix_train_test_split(df_user_item.to_numpy(), fraction=0.2)

# Train and test are disjoint (in terms of the 1-values)
train = pd.DataFrame(data=train, index=index, columns=columns)
test = pd.DataFrame(data=test, index=index, columns=columns)


## Method


In [None]:
%%time
from recommender_methods import collaborative_filtering_user_based

# List with users and their recommended articles
recommendations = []

# Returns a list with recommended articles
for user in range(100):
    recommended = collaborative_filtering_user_based(df_user_profile, train, train.index[user])
    recommendations.append(recommended)

predict = test.copy(deep=True)
predict[:] = 0.0
for user in recommendations:
    for article in user['articles']:
        predict.at[user['user_id'], article] = 1.0

predict.head()

# print(recommendations)