# Random recommender
The simplest recommendation system. 

Accuracy: 0.00016

### Import libraries

In [65]:
import pandas as pd
import numpy as np

### Read data

In [66]:
# base path for csv files
base_path = "data"

# interactions_and_impressions.csv
# Contains the training set, describing implicit preferences expressed by the users.
# user_id : identifier of the user
# item_id : identifier of the item (TV series)
# impression_list : string containing the items that were present on the screen when the user interacted with the item in column item_id. Not all interactions have a corresponding impressions list.
# data : "0" if the user watched the item, "1" if the user opened the item details page.
interactions_df_path = base_path + "/interactions_and_impressions.csv"

# data_ICM_length.csv 
# Contains the number of episodes of the items. TV series may have multiple episodes.
# item_id : identifier of the item
# feature_id : identifier of the feature, only one value (0) exists since this ICM only contains the feature "length"
# data : number of episodes. Some values may be 0 due to incomplete data.
items_length_df_path = base_path + "/data_ICM_length.csv"

# data_ICM_type.csv:
# Contains the type of the items. An item can only have one type.
# All types are anonymized and described only by a numerical identifier.
# item_id : identifier of the item
# feature_id : identifier of the type
# data : "1" if the item is described by the type
items_type_df_path = base_path + "/data_ICM_type.csv"

# data_target_users_test.csv:
# Contains the ids of the users that should appear in your submission file.
# The submission file should contain all and only these users.
users_df_path = base_path + "/data_target_users_test.csv"

In [78]:
dtype={0:int, 1:int, 2:str, 3:int}
interactions_df = pd.read_csv(filepath_or_buffer=interactions_df_path, dtype=dtype)
items_length_pf = pd.read_csv(filepath_or_buffer=items_length_df_path)
items_types_df = pd.read_csv(filepath_or_buffer=items_type_df_path)
users_df = pd.read_csv(filepath_or_buffer=users_df_path)

### Metrics

In [68]:
# Precision: how many of the recommended items are relevant
def precision(recommended_items, relevant_items):
    
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    precision_score = np.sum(is_relevant, dtype=np.float32) / len(is_relevant)
    
    return precision_score

In [69]:
# Recall: how many of the relevant items I was able to recommend
def recall(recommended_items, relevant_items):
    
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    recall_score = np.sum(is_relevant, dtype=np.float32) / relevant_items.shape[0]
    
    return recall_score

In [70]:
# Average Precision
def AP(recommended_items, relevant_items):
   
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    # Cumulative sum: precision at 1, at 2, at 3 ...
    p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))
    
    ap_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return ap_score

### Random recommender

In [71]:
class RandomRecommender(object):

    def fit(self, n_items):
        self.n_items = n_items
    
    def recommend(self, user_id, at=5):
    
        recommended_items = np.random.choice(self.n_items, at)

        return recommended_items

In [101]:
randomRecommender = RandomRecommender()
randomRecommender.fit(items_types_df.shape[0])

out_df = pd.DataFrame(columns=["user_id", "item_list"])

for user_id in users_df['user_id']:
    items_list = randomRecommender.recommend(user_id, at=10)
    items_list = map(str, items_list)
    items_list = " ".join(items_list)
    row = pd.DataFrame([[user_id, items_list]], columns=['user_id' , 'item_list'])
    out_df = pd.concat([out_df, row])

### Save result

In [102]:
out_df.to_csv("submission.csv", columns=["user_id", "item_list"], index=False)