# Data Mining Project

In [1]:
import copy

from dataLoad import *
from dataProcessing import *
from evaluation import *

### Loading User Set and Utility Matrix

In [2]:
userIDs = loadUserSet()
queryIDs, utilityMatrix, average = loadUtilityMatrix()

### Getting Liked and Disliked Queries
In `userQueryLikedDict` and `userQueryDislikedDict`  the key is the User ID and the value is a list of query IDs that each user liked / disliked.
In `queryUserLikedDict` and `queryUserLikedDict` the key is the Query ID and the value is a list of User IDs who liked / disliked the query.

In [3]:
(
    userQueryLikedDict,
    userQueryDislikedDict,
    queryUserLikedDict,
    queryUserDislikedDict
) = generateLikedDislikedDictionary(userIDs, queryIDs, utilityMatrix, average)

### Calculating the Similarity of Users and Queries with Jaccard Similarity

In [4]:
userSimilarity = jaccardSimilarity(userQueryLikedDict, userQueryDislikedDict, userIDs)
querySimilarity = jaccardSimilarity(queryUserLikedDict, queryUserDislikedDict, queryIDs)

### Getting 20% of the already rated queries for evaluation

In [5]:
evaluation = getQueriesForEvaluation(utilityMatrix, removeFromUtilityMatrix=False, percentage=0.2, seed=100)
queriesToEvaluate = prepareQueriesForPrediction(evaluation)

### Preparing a function to fill predictions
This function is used to fill the prediction of the queries that could not
be predicted. In this specific example average rating of user will be placed.

In [6]:
fillWithAvg = lambda user, query, average: average[user]
fillPredictions = lambda user, query: fillWithAvg(user, query, average)

### Make Predictions with Query Based Collaborative Filering

In [7]:
topNQueries = 4
query_cf_predictions = queryBasedCF(utilityMatrix, queriesToEvaluate, querySimilarity, topNQueries, fillNotPredictable=fillPredictions)

Not able to predict 0 ratings


### Make Predictions with User Based Collaborative Filtering

In [8]:
topNUsers = 4
user_cf_predictions = userBasedCF(utilityMatrix, queriesToEvaluate, userSimilarity, topNUsers, fillNotPredictable=fillPredictions)

Not able to predict 0 ratings


### Making the Predictions and Actual values flat

In [9]:
flat_user_cf_predictions, actual_user_cf = getPredictionsAndActualRatings(user_cf_predictions, evaluation)
flat_query_cf_predictions, actual_query_cf = getPredictionsAndActualRatings(query_cf_predictions, evaluation)

### Calculating RMSE

In [10]:
user_cf_rmse = rootMeanSquaredError(flat_user_cf_predictions, actual_user_cf)
query_cf_rmse = rootMeanSquaredError(flat_query_cf_predictions, actual_query_cf)

print(f"RMSE of User Based Collaborative Filtering: {user_cf_rmse}")
print(f"RMSE of Query Based Collaborative Filtering: {query_cf_rmse}")

RMSE of User Based Collaborative Filtering: 29.644252428959824
RMSE of Query Based Collaborative Filtering: 8.584055110897747
