In [1]:
import pandas as pd

# Class Definition

In [169]:
class HybridRecommendation:
    """
    A class for building and combining multiple recommendation models
    to create a hybrid recommendation system.

    Attributes:
    ----------
    csv_paths : list of str
        List of file paths to CSV files containing the model predictions.
    weights : list of float
        List of weights corresponding to each model to be used in the final combination.
    models : list of DataFrames
        List to store the loaded model prediction DataFrames.
    sorted_models : list of DataFrames
        List to store the sorted model prediction DataFrames.
    final_df : DataFrame
        The final combined DataFrame with userId and the hybrid recommendations.

    Methods:
    -------
    load_and_sort_data():
        Loads CSV files and sorts the data by 'userId' and columns by movie name.
    _sort_csv(df):
        Sorts a given DataFrame by 'userId' and movie columns alphabetically.
    _validate_structure():
        Validates that all models have the same structure (same columns, same userId order).
    combine_csvs():
        Combines the predictions from all models based on their weights and handles missing data.
    save_final_csv(output_path):
        Saves the final combined DataFrame to a CSV file.
    """

    def __init__(self, csv_paths , weights):
      """
        Initializes the HybridRecommendation class with the given CSV paths and weights.

        Parameters:
        -----------
        csv_paths : list of str
            List of file paths to CSV files containing the model predictions.
        weights : list of float
            List of weights corresponding to each model to be used in the final combination.
      """
      self.csv_paths = csv_paths
      self.weights = weights
      self.models = []
      self.sorted_models = []
      self.final_df = None

    #load and sort data
    def load_and_sort_data(self):
        """
        Loads the CSV files, sorts the data by 'userId' and movies,
        and stores the DataFrames in sorted_models.
        """
        for path in self.csv_paths:
            df = pd.read_csv(path)
            df_sorted = self._sort_csv(df)
            self.models.append(df)
            self.sorted_models.append(df_sorted)

    #sort the csv
    def _sort_csv(self, df):
      """
        Sorts a given DataFrame by 'userId' and then alphabetically by movie columns.

        Parameters:
        -----------
        df : DataFrame
            The DataFrame to be sorted.

        Returns:
        --------
        DataFrame
            The sorted DataFrame.
        """
      df_sorted = df.sort_values(by='userId')
      cols = ['userId'] + sorted([col for col in df.columns if col != 'userId'])
      return df_sorted[cols]

    #validate csvs before combining
    def _validate_structure(self):
        """
        Validates that all models have the same structure, including column names
        and the order of 'userId'.

        Raises:
        -------
        ValueError
            If there are mismatches in column names or the order of 'userId' across models.
        """
        base_columns = list(self.sorted_models[0].columns)
        base_users = list(self.sorted_models[0]['userId'])

        for model in self.sorted_models[1:]:
            if list(model.columns) != base_columns:
                raise ValueError("Column names or order do not match across models.")
            if list(model['userId']) != base_users:
                raise ValueError("UserId order does not match across models.")

    #combine csvs
    def combine_csvs(self):
      """
        Combines the predictions from all models based on their weights and handles missing data.
        The final combined DataFrame is stored in final_df.
      """
      self.load_and_sort_data()
      self._validate_structure()

      rating_matrices = [model.drop('userId', axis=1) for model in self.sorted_models]
      content_based = rating_matrices[3]

      combined_ratings = sum(
          w * m for m, w in zip(rating_matrices, self.weights))

      zero_mask = sum((m == 0) for m in rating_matrices) == 3
      combined_ratings[zero_mask] = content_based[zero_mask] / 0.25

      user_ids = self.sorted_models[0]['userId']
      self.final_df = pd.concat([user_ids, combined_ratings], axis=1)

    #save final dataframe
    def save_final_csv(self, output_path):
        """
        Saves the final combined DataFrame to a CSV file.

        Parameters:
        -----------
        output_path : str
            The path to save the CSV file.
        """
        if self.final_df is not None:
            self.final_df.to_csv(output_path, index=False)
        else:
            raise ValueError("Final DataFrame not created yet. Run combine() first.")

# Extracting and loading predictions of each model

In [None]:
!unzip /content/item_matrix_finalll.zip

Archive:  /content/item_matrix_finalll.zip
  inflating: item_matrix_finalll.csv  


In [102]:
!unzip /content/user_movie_prediction_from_SVD_model.zip

Archive:  /content/user_movie_prediction_from_SVD_model.zip
replace user_movie_prediction_from_SVD_model.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: user_movie_prediction_from_SVD_model.csv  


In [103]:
!unzip /content/Content_based_predictions.zip

Archive:  /content/Content_based_predictions.zip
replace Content based predictions/Content_based_predictions_matrix.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Content based predictions/Content_based_predictions_matrix.csv  
replace Content based predictions/Mapped_Content_based_predictions_matrix.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Content based predictions/Mapped_Content_based_predictions_matrix.csv  


# Viewing each model output

In [124]:
#Path for each recommendation system
item_baesd = '/content/item_matrix_finalll.csv'
user_based = '/content/Users_Movies_Predictions_Processed.csv'
matrix_factorization = '/content/user_movie_prediction_from_SVD_model.csv'
Content_based = '/content/Content based predictions/Mapped_Content_based_predictions_matrix.csv'

In [122]:
pd.read_csv(item_baesd).head()

Unnamed: 0,userId,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),...,Gintama: The Movie (2010),anohana: The Flower We Saw That Day - The Movie (2013),Silver Spoon (2014),Love Live! The School Idol Movie (2015),Jon Stewart Has Left the Building (2015),Black Butler: Book of the Atlantic (2017),No Game No Life: Zero (2017),Flint (2017),Bungo Stray Dogs: Dead Apple (2018),Andrew Dice Clay: Dice Rules (1991)
0,1,0.0,4.3,0.0,4.7,4.8,0.0,4.2,4.0,4.2,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,0,5.0,4.44
1,2,3.9,4.0,3.9,3.5,4.4,4.15,3.85,4.15,4.13,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,0,4.0,3.83
2,3,1.21,2.11,0.5,0.86,2.4,1.41,0.95,0.95,3.33,...,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0,3.5,3.5
3,4,3.5,3.4,3.8,4.1,3.4,3.3,3.5,3.0,2.9,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0,2.0,3.38
4,5,0.0,3.6,3.9,3.6,4.0,3.71,4.0,3.1,3.7,...,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0,3.5,4.0


In [125]:
pd.read_csv(user_based).head()

Unnamed: 0,userId,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),...,Gintama: The Movie (2010),anohana: The Flower We Saw That Day - The Movie (2013),Silver Spoon (2014),Love Live! The School Idol Movie (2015),Jon Stewart Has Left the Building (2015),Black Butler: Book of the Atlantic (2017),No Game No Life: Zero (2017),Flint (2017),Bungo Stray Dogs: Dead Apple (2018),Andrew Dice Clay: Dice Rules (1991)
0,1,0.0,2.5913,0.0,2.4772,2.571,0.0,2.6539,2.5771,2.2804,...,2.774,2.4921,3.0559,3.0559,2.774,3.0559,2.774,2.774,2.774,3.0277
1,2,2.6169,2.6303,2.6407,1.9087,2.2891,2.6709,2.301,2.2019,2.3925,...,2.1119,1.8433,2.3806,2.3806,2.1119,2.3806,2.1119,2.1119,2.1119,3.097
2,3,2.6682,2.6968,2.5023,2.8474,2.686,2.8299,2.5227,2.3996,2.6444,...,2.75,2.75,2.75,2.75,2.75,2.75,2.75,2.75,2.75,2.75
3,4,2.6927,2.6519,2.4369,2.1782,2.5013,2.5845,2.4085,2.2479,2.4116,...,3.0147,3.2794,2.75,2.75,3.0147,2.75,3.0147,3.0147,3.0147,2.4191
4,5,0.0,2.6211,2.478,2.0989,2.492,2.757,2.5952,2.6771,2.077,...,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.8914


In [126]:
pd.read_csv(matrix_factorization).head()

Unnamed: 0,userId,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),...,Gintama: The Movie (2010),anohana: The Flower We Saw That Day - The Movie (2013),Silver Spoon (2014),Love Live! The School Idol Movie (2015),Jon Stewart Has Left the Building (2015),Black Butler: Book of the Atlantic (2017),No Game No Life: Zero (2017),Flint (2017),Bungo Stray Dogs: Dead Apple (2018),Andrew Dice Clay: Dice Rules (1991)
0,1,0.0,4.01,0.0,3.49,3.77,0.0,3.82,3.84,3.64,...,4.01,4.06,4.32,4.44,4.1,4.19,4.19,4.19,4.26,4.46
1,2,3.88,3.47,3.45,3.01,3.21,3.82,3.12,3.27,3.15,...,3.34,3.42,3.69,3.54,3.38,3.57,3.57,3.57,3.39,3.5
2,3,2.44,2.31,1.85,1.78,2.01,2.54,1.8,2.12,1.93,...,1.95,2.19,2.47,2.14,2.54,2.3,2.3,2.3,2.44,2.22
3,4,3.83,3.36,2.96,2.82,2.63,3.66,3.14,3.18,2.89,...,3.18,3.44,3.7,3.38,3.68,3.42,3.42,3.42,3.68,3.62
4,5,0.0,3.28,3.22,2.6,2.79,3.55,2.99,3.07,2.65,...,3.33,3.33,3.47,3.57,3.35,3.39,3.39,3.39,3.42,3.71


In [127]:
pd.read_csv(Content_based).head()

Unnamed: 0,userId,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),...,Gintama: The Movie (2010),anohana: The Flower We Saw That Day - The Movie (2013),Silver Spoon (2014),Love Live! The School Idol Movie (2015),Jon Stewart Has Left the Building (2015),Black Butler: Book of the Atlantic (2017),No Game No Life: Zero (2017),Flint (2017),Bungo Stray Dogs: Dead Apple (2018),Andrew Dice Clay: Dice Rules (1991)
0,1,0.0,1.219871,0.0,1.261826,1.242525,0.0,1.539146,1.193359,1.261826,...,1.052152,1.050919,1.050522,1.050132,1.249903,1.049372,1.049372,1.248771,1.342651,1.264351
1,2,3.80551,3.712612,3.837187,3.80551,3.80551,3.844348,3.697028,3.80551,3.80551,...,3.689955,3.688391,3.687887,3.687392,4.052942,3.686424,3.686425,3.794751,3.644321,3.807616
2,3,2.878805,2.715783,2.804403,2.875632,2.933616,2.907951,2.665687,2.933616,2.881303,...,3.704271,3.699941,3.698545,3.697173,3.110815,3.694495,3.694495,2.966801,3.315335,2.92716
3,4,3.503755,3.296314,3.419446,3.667907,3.781866,3.612372,3.617601,3.667907,3.667907,...,2.251389,2.264601,2.268862,2.273054,3.676732,2.281238,2.281238,3.618048,3.250817,3.677623
4,5,0.0,2.870044,2.681221,2.540873,2.540873,2.812615,2.942619,2.540873,2.540873,...,2.937004,2.924092,2.919937,2.915854,2.535971,2.907895,2.907895,2.466601,2.663657,2.541918


# Get the final predictions

In [162]:
hybrid_recommender = HybridRecommendation(
    csv_paths=[item_baesd, user_based, matrix_factorization, Content_based],
    weights=[0.25, 0.25, 0.25, 0.25])

In [163]:
hybrid_recommender.combine_csvs()

In [164]:
hybrid_recommender.save_final_csv('/content/hybrid_final.csv')

In [165]:
pd.read_csv('/content/hybrid_final.csv')

Unnamed: 0,userId,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
0,1,3.294472,3.240305,3.197963,3.103213,3.033445,2.699750,3.010636,3.023194,3.248401,...,2.864696,3.317470,3.240200,3.162152,3.150755,3.208102,2.805379,2.605448,0.000000,3.877549
1,2,3.363154,3.690786,3.419679,3.493639,3.429519,3.546806,3.602372,3.652905,3.473866,...,3.182691,3.634563,3.400983,3.155320,3.237923,3.221948,3.278840,3.182263,3.516860,2.892456
2,3,2.796531,2.516913,2.281994,2.396991,2.380473,2.427008,2.235847,2.183198,2.219931,...,2.493817,2.728380,2.528986,2.368700,3.034985,2.574819,2.189815,1.822693,2.315095,2.175919
3,4,3.081347,3.239426,3.207531,3.739799,3.710482,3.193705,3.129227,3.175968,3.214055,...,3.021923,3.244848,3.093524,3.072533,2.746000,3.151072,2.825881,3.239717,3.088406,3.670773
4,5,3.064409,3.212151,3.137499,3.255269,3.129265,3.219337,2.928802,3.030839,3.193549,...,2.684060,3.474620,3.337114,3.415690,3.109948,3.203882,2.950122,2.604740,3.183478,3.913594
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,606,2.954311,2.886799,2.859725,3.064418,2.848903,2.836366,2.610033,2.698372,2.880579,...,2.559561,2.961563,2.801870,2.788780,3.086850,3.081880,2.407537,2.024241,2.545163,3.836417
606,607,3.097953,3.212842,2.959754,2.976491,2.995616,3.136592,2.929983,3.065432,3.224057,...,2.946788,3.263304,3.395350,3.087119,2.644890,3.109511,2.834610,2.673129,3.096597,2.834220
607,608,2.890220,2.864873,2.647680,2.937473,2.541929,2.141328,2.282259,2.652363,2.537108,...,2.498756,2.854181,2.889379,2.789654,2.574363,0.000000,0.000000,2.241969,2.464432,2.259509
608,609,2.859113,2.782745,2.642905,2.963148,2.729383,2.203613,2.549019,2.510184,2.809980,...,2.462397,3.016489,3.048530,3.202950,2.676619,2.900442,2.604920,2.337832,2.649787,2.912024


In [167]:
df = pd.read_csv('/content/hybrid_final.csv' , index_col=0)
df.index.name = None
df.head()

Unnamed: 0,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
1,3.294472,3.240305,3.197963,3.103213,3.033445,2.69975,3.010636,3.023194,3.248401,3.103585,...,2.864696,3.31747,3.2402,3.162152,3.150755,3.208102,2.805379,2.605448,0.0,3.877549
2,3.363154,3.690786,3.419679,3.493639,3.429519,3.546806,3.602372,3.652905,3.473866,3.457843,...,3.182691,3.634563,3.400983,3.15532,3.237923,3.221948,3.27884,3.182263,3.51686,2.892456
3,2.796531,2.516913,2.281994,2.396991,2.380473,2.427008,2.235847,2.183198,2.219931,2.346061,...,2.493817,2.72838,2.528986,2.3687,3.034985,2.574819,2.189815,1.822693,2.315095,2.175919
4,3.081347,3.239426,3.207531,3.739799,3.710482,3.193705,3.129227,3.175968,3.214055,3.158904,...,3.021923,3.244848,3.093524,3.072533,2.746,3.151072,2.825881,3.239717,3.088406,3.670773
5,3.064409,3.212151,3.137499,3.255269,3.129265,3.219337,2.928802,3.030839,3.193549,3.176091,...,2.68406,3.47462,3.337114,3.41569,3.109948,3.203882,2.950122,2.60474,3.183478,3.913594


In [168]:
df.columns

Index([''71 (2014)', ''Hellboy': The Seeds of Creation (2004)',
       ''Round Midnight (1986)', ''Salem's Lot (2004)',
       ''Til There Was You (1997)', ''Tis the Season for Love (2015)',
       ''burbs, The (1989)', ''night Mother (1986)',
       '(500) Days of Summer (2009)', '*batteries not included (1987)',
       ...
       'Zulu (2013)', '[REC] (2007)', '[REC]² (2009)',
       '[REC]³ 3 Génesis (2012)',
       'anohana: The Flower We Saw That Day - The Movie (2013)',
       'eXistenZ (1999)', 'xXx (2002)', 'xXx: State of the Union (2005)',
       '¡Three Amigos! (1986)', 'À nous la liberté (Freedom for Us) (1931)'],
      dtype='object', length=9742)