# Import Data


## Import and Install

In [53]:
%pip install pandas

import pandas as pd
import os
import urllib.request
import zipfile

Note: you may need to restart the kernel to use updated packages.


## Get Data

In [54]:
DATA_URL = 'https://files.grouplens.org/datasets/movielens/ml-latest.zip'
DATA_DIR = '../data'
DATA_FILE = 'ml-latest.zip'

data_path = os.path.join(DATA_DIR, DATA_FILE)

if not os.path.exists(data_path):
    urllib.request.urlretrieve(DATA_URL, data_path)
    with zipfile.ZipFile(data_path, 'r') as zip_ref:
        zip_ref.extractall(DATA_DIR)

## Load Data

In [55]:
def load_data():
    # Define the paths to the files
    movies_path = os.path.join(DATA_DIR, 'ml-latest-small', 'movies.csv')
    ratings_path = os.path.join(DATA_DIR, 'ml-latest-small', 'ratings.csv')
    tags_path = os.path.join(DATA_DIR, 'ml-latest-small', 'tags.csv')
    links_path = os.path.join(DATA_DIR, 'ml-latest-small', 'links.csv')

    # Load the data into pandas DataFrames
    movies_df = pd.read_csv(movies_path)
    ratings_df = pd.read_csv(ratings_path)
    tags_df = pd.read_csv(tags_path)
    links_df = pd.read_csv(links_path)
    
    return movies_df, ratings_df, tags_df, links_df

# Data Exploration
What data exploration methods do we need?

# Train Models

## Content-Based
This is a function to train a content based recommendation model



In [56]:
def train_content_based_model(movies, ratings, tags, links) :
    print("Training content-based model")

## Collaborative Filtering - Neighborhood


In [57]:
def train_neighborhood_model(movies, ratings, tags, links) :
    print("Training neighborhood model")

## Collaborative Filtering - Matrix Factorization


In [58]:
def train_matrix_factorization_model(movies, ratings, tags, links):
    print("Training matrix factorization model")
    

## Train all Models

In [59]:
def train_models(movies, ratings, tags, links) :
    train_content_based_model(movies, ratings, tags, links)
    train_neighborhood_model(movies, ratings, tags, links)
    train_matrix_factorization_model(movies, ratings, tags, links)

movies_df, ratings_df, tags_df, links_df = load_data()
train_models(movies=movies_df, ratings=ratings_df, tags=tags_df, links=links_df)

Training content-based model
Training neighborhood model
Training matrix factorization model


# Use Models

## Content-Based

In [60]:
# This function should return a list of recommended items with their scores
# [{'movieId': 1, 'score': 0.5}, {'movieId': 2, 'score': 0.4}, {'movieId': 3, 'score': 0.3}]
def make_content_based_recommendations(user, model) -> []:
    return []

## Collaborative Filtering - Neighborhood


In [61]:
# This function should return a list of recommended items with their scores
# [{'movieId': 1, 'score': 0.5}, {'movieId': 2, 'score': 0.4}, {'movieId': 3, 'score': 0.3}]
def make_neighborhood_recommendations(user, model) -> []:
    return []

## Collaborative Filtering - Matrix Factorization


In [62]:
# This function should return a list of recommended items with their scores
# [{'movieId': 1, 'score': 0.5}, {'movieId': 2, 'score': 0.4}, {'movieId': 3, 'score': 0.3}]
def make_matrix_factorization_recommendations(user, model) -> []:
    # add the user to the model
    # get the recommendations for the user
    return []

## Hybrid Recommendations

In [63]:
def make_recommendations(user, content_model, collab_model1, collab_model2) -> []:
    content_based_recommendations = make_content_based_recommendations(user, content_model)
    neighborhood_recommendations = make_neighborhood_recommendations(user, collab_model1)
    matrix_factorization_recommendations = make_matrix_factorization_recommendations(user, collab_model2)

    # Combine the recommendations from the three models
    return []

### Train the models to estimate a score for a recommendation

In [64]:
# user contains information about the user -> details tbd
# probably a list of movies the user has rated and the ratings given

# movie_list contains a list of movie ids
def recommendations_from_list(user, movie_list, content_model, collab_model1, collab_model2):
    scores = {}
    for movie in movie_list:
        try: 
            content_score = content_model.estimate(user, movie)
        except: 
            content_score = 0
        try:
            collab_score1 = collab_model1.estimate(user, movie)
        except: 
            collab_score1 = 0
        try:
            collab_score2 = collab_model2.estimate(user, movie)
        except: 
            collab_score2 = 0

        combined_score = (content_score + collab_score1 + collab_score2) / 3
        scores[movie] = combined_score
        # Create a combined score
        combined_score = (content_score + collab_score1 + collab_score2) / 3
        scores[movie] = combined_score
    # Sort the scores with best recommendations first
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_scores