In [13]:
%run helper.ipynb

In [14]:
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
class UserCollabWeightedAverageModel:
    
    def __init__(self, X_train, y_train, similarity='cosine'):
        
        X_train['overall'] = y_train
        self._user_product_matrix = pd.crosstab(X_train.reviewerID, X_train.asin, X_train.overall, aggfunc='max')
        
        if similarity == 'cosine': 
            cos_similarity = cosine_similarity(self._user_product_matrix.copy().fillna(0))
            self._similarity = pd.DataFrame(cos_similarity, index=self._user_product_matrix.index)
            self._similarity.columns = self._user_product_matrix.index
        elif similarity == 'pearson':
            pea_similarity = np.corrcoef(self._user_product_matrix.copy().fillna(0))
            self._similarity = pd.DataFrame(pea_similarity, index=self._user_product_matrix.index)
            self._similarity.columns = self._user_product_matrix.index
        else:
            raise Exception
    
    def predict(self, X):
        results = np.array([])
        for i, row in X.iterrows():
            results = np.append(results, self._predict(row))
        return results
            
    
    def _predict(self, X):
        if X['asin'] in self._user_product_matrix and X['reviewerID'] in self._user_product_matrix.index:
            sim_scores = self._similarity[X['reviewerID']] 
            ratings_scores = self._user_product_matrix[X['asin']] 

            index_not_rated = ratings_scores[ratings_scores.isnull()].index
            ratings_scores = ratings_scores.dropna()
            sim_scores = sim_scores.drop(index_not_rated)

            if sim_scores.sum() != 0:
                return np.dot(ratings_scores, sim_scores)/sim_scores.sum()
        
        return 2.5

In [16]:
X_train, y_train = load_train(subset_name)
X_test, y_test = load_test(subset_name)

In [17]:
model = UserCollabWeightedAverageModel(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_test, y_pred)

RMSE: 1.0733666399413633
MAE: 0.7475806347608352



In [18]:
save_preds(model, subset_name, y_pred,'pearson')