In [481]:
import pandas as pd
import numpy as np
import kagglehub
import os
import matplotlib.pyplot as plt
import seaborn as sns
import random
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [482]:
path = kagglehub.dataset_download("senapatirajesh/netflix-tv-shows-and-movies")

print("Path to dataset files:", path)
print("Path to dataset files:", path)
csv_file = None
for file in os.listdir(path):
    if file.endswith('.csv'):
        csv_file = os.path.join(path, file)
        break

if csv_file is None:
    print("No CSV file found in the directory.")
else:
    print(f"Found CSV file: {csv_file}")


Path to dataset files: C:\Users\keert\.cache\kagglehub\datasets\senapatirajesh\netflix-tv-shows-and-movies\versions\1
Path to dataset files: C:\Users\keert\.cache\kagglehub\datasets\senapatirajesh\netflix-tv-shows-and-movies\versions\1
Found CSV file: C:\Users\keert\.cache\kagglehub\datasets\senapatirajesh\netflix-tv-shows-and-movies\versions\1\NetFlix.csv


In [483]:
df = pd.read_csv(csv_file, delimiter=',')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,14-Aug-20,2020,TV-MA,4,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s10,Movie,1920,Vikram Bhatt,"Rajneesh Duggal, Adah Sharma, Indraneil Sengup...",India,15-Dec-17,2008,TV-MA,143,"Horror Movies, International Movies, Thrillers",An architect and his wife move into a castle t...
2,s100,Movie,3 Heroines,Iman Brotoseno,"Reza Rahadian, Bunga Citra Lestari, Tara Basro...",Indonesia,05-Jan-19,2016,TV-PG,124,"Dramas, International Movies, Sports Movies",Three Indonesian women break records by becomi...
3,s1000,Movie,Blue Mountain State: The Rise of Thadland,Lev L. Spiro,"Alan Ritchson, Darin Brooks, James Cade, Rob R...",United States,01-Mar-16,2016,R,90,Comedies,New NFL star Thad buys his old teammates' belo...
4,s1001,TV Show,Blue Planet II,,David Attenborough,United Kingdom,03-Dec-18,2017,TV-G,1,"British TV Shows, Docuseries, Science & Nature TV",This sequel to the award-winning nature series...


In [484]:
df.drop('description',axis=1,inplace=True)

In [485]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7787 entries, 0 to 7786
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       7787 non-null   object
 1   type          7787 non-null   object
 2   title         7787 non-null   object
 3   director      5398 non-null   object
 4   cast          7069 non-null   object
 5   country       7280 non-null   object
 6   date_added    7777 non-null   object
 7   release_year  7787 non-null   int64 
 8   rating        7780 non-null   object
 9   duration      7787 non-null   int64 
 10  genres        7787 non-null   object
dtypes: int64(2), object(9)
memory usage: 669.3+ KB


In [486]:
df.isnull().sum()

show_id            0
type               0
title              0
director        2389
cast             718
country          507
date_added        10
release_year       0
rating             7
duration           0
genres             0
dtype: int64

<h1>Handling Null Values</h1> Replaced ['director','cast','country'] Null values with Unknown,Replace 'date_added' with 'Not Available',Replace 'rating' with mode

In [487]:
df['director']= df['director'].fillna('Unknown')
df['cast']= df['cast'].fillna('Unknown')
df['country'] = df['country'].fillna('Unknown')

In [488]:
df['date_added']=df['date_added'].fillna('Not Available')
df['rating']=df['rating'].fillna(df['rating'].mode()[0])

In [489]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7787 entries, 0 to 7786
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       7787 non-null   object
 1   type          7787 non-null   object
 2   title         7787 non-null   object
 3   director      7787 non-null   object
 4   cast          7787 non-null   object
 5   country       7787 non-null   object
 6   date_added    7787 non-null   object
 7   release_year  7787 non-null   int64 
 8   rating        7787 non-null   object
 9   duration      7787 non-null   int64 
 10  genres        7787 non-null   object
dtypes: int64(2), object(9)
memory usage: 669.3+ KB


In [490]:
df['show_id'].value_counts()

show_id
s999     1
s1       1
s10      1
s100     1
s1000    1
        ..
s1010    1
s1011    1
s1012    1
s1013    1
s1014    1
Name: count, Length: 7787, dtype: int64

In [491]:
df['show_id']= df['show_id'].str.replace('s', '').astype(int)

In [492]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres
0,1,TV Show,3%,Unknown,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,14-Aug-20,2020,TV-MA,4,"International TV Shows, TV Dramas, TV Sci-Fi &..."
1,10,Movie,1920,Vikram Bhatt,"Rajneesh Duggal, Adah Sharma, Indraneil Sengup...",India,15-Dec-17,2008,TV-MA,143,"Horror Movies, International Movies, Thrillers"
2,100,Movie,3 Heroines,Iman Brotoseno,"Reza Rahadian, Bunga Citra Lestari, Tara Basro...",Indonesia,05-Jan-19,2016,TV-PG,124,"Dramas, International Movies, Sports Movies"
3,1000,Movie,Blue Mountain State: The Rise of Thadland,Lev L. Spiro,"Alan Ritchson, Darin Brooks, James Cade, Rob R...",United States,01-Mar-16,2016,R,90,Comedies
4,1001,TV Show,Blue Planet II,Unknown,David Attenborough,United Kingdom,03-Dec-18,2017,TV-G,1,"British TV Shows, Docuseries, Science & Nature TV"


<h1>Generating Synthetic Dataset with customer_id column</h1> Duplicated 10,000 Rows randomly and added customer_id column randomly from range 1 -50

In [493]:
# Create a list of indices to duplicate
indices_to_duplicate = np.random.randint(0, 7787, 2000)

# Duplicate the rows
df_duplicated = df.iloc[indices_to_duplicate]

# Concatenate the original DataFrame and the duplicated rows
df_expanded = pd.concat([df, df_duplicated])
df_expanded.shape

(9787, 11)

In [494]:
# Generate random customer IDs between 1 and 50
customer_ids = np.random.randint(1, 51, size=len(df_expanded))

# Add the 'customer_id' column to the DataFrame
df_expanded['customer_id'] = customer_ids
df_expanded = df_expanded.sort_values(by=['customer_id','show_id'])
df_expanded.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,customer_id
201,118,Movie,5 Cowok Jagoan,Anggy Umbara,"Ario Bayu, Arifin Putra, Dwi Sasono, Muhadkly ...",Unknown,05-Jan-19,2017,TV-MA,106,"Action & Adventure, Comedies, International Mo...",1
357,132,Movie,68 Kill,Trent Haaga,"Matthew Gray Gubler, AnnaLynne McCord, Alisha ...",United States,01-Sep-19,2017,R,96,"Comedies, Independent Movies, Thrillers",1
557,150,Movie,A Beautiful Life,Andrew Lau Wai-Keung,"Qi Shu, Liu Ye, Anthony Wong Chau-Sang, Liang ...","China, Hong Kong",01-Dec-18,2011,TV-14,122,"Dramas, International Movies, Romantic Movies",1
1089,198,TV Show,A Little Help with Carol Burnett,Unknown,"Carol Burnett, Russell Peters, Mark Cuban, Tar...",United States,04-May-18,2018,TV-G,1,"Stand-Up Comedy & Talk Shows, TV Comedies",1
1756,258,Movie,A Yellow Bird,K. Rajagopal,"Sivakumar Palakrishnan, Huang Lu, Seema Biswas...","Singapore, France",01-May-17,2016,TV-MA,111,"Dramas, Independent Movies, International Movies",1


<h1>Creating User_Show_Matrix</h1> Rows indicate the customer_id and columns indicate each show_id and the value 1 at particular show_id indicates that particular customer has watched that show and 0 indicates that particular customer hasn't watched that show

In [495]:
customer_show = df_expanded.groupby(['customer_id','show_id'])['show_id'].count().reset_index(name ='Count')
customer_show

Unnamed: 0,customer_id,show_id,Count
0,1,118,1
1,1,132,1
2,1,150,1
3,1,198,1
4,1,258,1
...,...,...,...
9748,50,7714,1
9749,50,7742,1
9750,50,7750,1
9751,50,7780,1


In [496]:
my_show = customer_show.pivot_table(index='customer_id', columns='show_id', values='Count', aggfunc='sum').fillna(0)
# making a function which returns 0 or 1
# 0 means item was not in that transaction, 1 means item present in that transaction

def encode(x):
    if x<=0:
        return 0
    if x>=1:
        return 1

# applying the function to the dataset
my_show_sets = my_show.map(encode)
my_show_sets.head()

show_id,1,2,3,4,5,6,7,8,9,10,...,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0


<h1>Identifying Frequently Watched Show Combinations</h1>  The apriori algorithm is used to discover these frequent itemsets, and the results are sorted by support to highlight the most commonly watched combinations.

In [497]:
frequent_itemsets = apriori(my_show_sets, min_support=0.03, use_colnames=True)  # Adjust min_support as needed
frequent_itemsets.sort_values(by='support',ascending=False)



Unnamed: 0,support,itemsets
16,0.10,(97)
1273,0.10,(5577)
1100,0.08,(4869)
1531,0.08,(6771)
1177,0.08,(5171)
...,...,...
5613,0.04,"(1928, 685, 6195, 852, 4471, 7262, 4607)"
5614,0.04,"(3404, 685, 6195, 852, 4471, 7262, 4607)"
5615,0.04,"(1928, 3404, 685, 6195, 4471, 7262, 4607)"
5616,0.04,"(1928, 3404, 6195, 852, 4471, 7262, 4607)"


In [498]:
df_expanded['show_id'].value_counts().sort_values(ascending=False)

show_id
5577    5
97      5
3920    4
82      4
4869    4
       ..
7645    1
7705    1
7714    1
7742    1
5822    1
Name: count, Length: 7787, dtype: int64

<h1>Discovering Strong Associations Between Shows</h1>  Generate association rules between frequently watched shows

In [499]:
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)
rules.sort_values('confidence', ascending = False, inplace = True)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
27119,(4607),"(1928, 3404, 685, 6195, 852, 4471, 7262)",0.04,0.04,0.04,1.0,25.0,0.0384,inf,1.0
0,(5),(846),0.04,0.04,0.04,1.0,25.0,0.0384,inf,1.0
1,(846),(5),0.04,0.04,0.04,1.0,25.0,0.0384,inf,1.0
2,(9),(418),0.04,0.04,0.04,1.0,25.0,0.0384,inf,1.0
3,(418),(9),0.04,0.04,0.04,1.0,25.0,0.0384,inf,1.0


<h1>Content Based Recommendation</h1> Extracts recommended shows from the rules and returns the top N

In [500]:
def recommend_shows(show_id, rules, user_item_matrix, top_n=5):
  """
  Recommends shows based on a given show ID.

  Args:
      show_id (int): The ID of the show to use for recommendations.
      rules (pandas.DataFrame): The association rules generated from Apriori.
      user_item_matrix (pandas.DataFrame): The user-item matrix.
      top_n (int, optional): The number of recommendations to return. Defaults to 5.

  Returns:
      list: A list of recommended show IDs.
  """
  itemsets = rules[rules['antecedents'] == frozenset([show_id])]
  #print(itemsets)
  recommendations = []
  for _, row in itemsets.iterrows():
      recommendations.extend(list(row['consequents']))
  #print(recommendations)
  recommendations = list(set(recommendations) - set([show_id]))[:top_n]
  return recommendations

<h1>Top 5 Content Recommendations</h1>

In [514]:
content_recommendation=recommend_shows(4607, rules, my_show_sets, top_n=10)
print("Top 5 Recommendation Show based on your watch")
content_recommendation

Top 5 Recommendation Show based on your watch


[1928, 3404, 685, 6195, 852, 4471, 7262]

In [515]:
# Assuming you have a DataFrame `df` with 'show_id' and other relevant columns

# ... (rest of your code)

# Create a new DataFrame with the recommended show IDs
recommended_df = pd.DataFrame({'show_id': content_recommendation})

# You can now print or use this DataFrame as needed
print(recommended_df)
# Merge the recommended DataFrame with the original DataFrame
merged_df = pd.merge(recommended_df, df_expanded, on='show_id')

merged_df

   show_id
0     1928
1     3404
2      685
3     6195
4      852
5     4471
6     7262


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,customer_id
0,1928,Movie,Ee Nagaraniki Emaindi,Tharun Bhascker,"Vishwaksen Naidu, Sushanth Reddy, Abhinav Goma...",India,06-Sep-19,2018,TV-14,133,"Comedies, International Movies",4
1,1928,Movie,Ee Nagaraniki Emaindi,Tharun Bhascker,"Vishwaksen Naidu, Sushanth Reddy, Abhinav Goma...",India,06-Sep-19,2018,TV-14,133,"Comedies, International Movies",8
2,1928,Movie,Ee Nagaraniki Emaindi,Tharun Bhascker,"Vishwaksen Naidu, Sushanth Reddy, Abhinav Goma...",India,06-Sep-19,2018,TV-14,133,"Comedies, International Movies",10
3,3404,TV Show,Kingdoms of the Sky,Steve Greenwood,Unknown,Unknown,01-Oct-18,2018,TV-PG,1,"British TV Shows, Docuseries, Science & Nature TV",8
4,3404,TV Show,Kingdoms of the Sky,Steve Greenwood,Unknown,Unknown,01-Oct-18,2018,TV-PG,1,"British TV Shows, Docuseries, Science & Nature TV",10
5,685,Movie,Backtrack,Michael Petroni,"Adrien Brody, Sam Neill, Robin McLeavy, Bruce ...","Australia, United Kingdom, United Arab Emirate...",25-Dec-18,2015,R,90,"Dramas, Thrillers",8
6,685,Movie,Backtrack,Michael Petroni,"Adrien Brody, Sam Neill, Robin McLeavy, Bruce ...","Australia, United Kingdom, United Arab Emirate...",25-Dec-18,2015,R,90,"Dramas, Thrillers",10
7,6195,Movie,The Cakemaker,Ofir Raul Graizer,"Sarah Adler, Tim Kalkhof, Roy Miller, Zohar Sh...","Israel, Germany",04-Dec-18,2017,TV-14,109,"Dramas, Independent Movies, International Movies",8
8,6195,Movie,The Cakemaker,Ofir Raul Graizer,"Sarah Adler, Tim Kalkhof, Roy Miller, Zohar Sh...","Israel, Germany",04-Dec-18,2017,TV-14,109,"Dramas, Independent Movies, International Movies",10
9,6195,Movie,The Cakemaker,Ofir Raul Graizer,"Sarah Adler, Tim Kalkhof, Roy Miller, Zohar Sh...","Israel, Germany",04-Dec-18,2017,TV-14,109,"Dramas, Independent Movies, International Movies",40


In [512]:
df_expanded[df_expanded['show_id']==4607]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,customer_id
4009,4607,Movie,Olmo & the Seagull,"Petra Costa, Lea Glob","Olivia Corsini, Sergio Nicolai, Arman Saribeky...","Denmark, Brazil, France, Portugal, Sweden",01-Sep-19,2014,TV-MA,82,"Dramas, Independent Movies, International Movies",8
4009,4607,Movie,Olmo & the Seagull,"Petra Costa, Lea Glob","Olivia Corsini, Sergio Nicolai, Arman Saribeky...","Denmark, Brazil, France, Portugal, Sweden",01-Sep-19,2014,TV-MA,82,"Dramas, Independent Movies, International Movies",10


<h1>Collaborative Recommendations</h1>  computes the pairwise cosine similarity between users 

In [504]:
# Calculate cosine similarity
user_similarity = cosine_similarity(my_show_sets)
print(user_similarity.shape)
user_similarity

(50, 50)


array([[1.        , 0.01092961, 0.01030764, ..., 0.01123755, 0.00532284,
        0.00557123],
       [0.01092961, 1.        , 0.0050978 , ..., 0.0166731 , 0.01052996,
        0.00551067],
       [0.01030764, 0.0050978 , 1.        , ..., 0.01048285, 0.00496536,
        0.00519707],
       ...,
       [0.01123755, 0.0166731 , 0.01048285, ..., 1.        , 0.01623996,
        0.00566593],
       [0.00532284, 0.01052996, 0.00496536, ..., 0.01623996, 1.        ,
        0.00536751],
       [0.00557123, 0.00551067, 0.00519707, ..., 0.00566593, 0.00536751,
        1.        ]])

<p><b>Identifies top similar users and recommends shows they've watched, excluding those already seen by the target user</b></p>

In [505]:
def recommend_similar_user_shows(user_id, user_item_matrix, user_similarity, top_n=10):
    # Get the similarity scores of the target user with all other users
    similar_users = user_similarity[user_id]

    # Sort the users by similarity score in descending order
    similar_users = similar_users.argsort()[::-1]
    print( similar_users)
    # Get the top N similar users
    top_users = similar_users[1:top_n+1]

    # Get the shows watched by the top N similar users
    recommended_shows = []
    for user_id in top_users:
        recommended_shows.extend(user_item_matrix.columns[user_item_matrix.loc[user_id] == 1])

    # Remove shows already watched by the target user
    recommended_shows = list(set(recommended_shows) - set(user_item_matrix.columns[user_item_matrix.loc[user_id] == 1]))

    return recommended_shows[:top_n]

<h1>Top 5 Collabarative Recommendations</h1>

In [506]:
collabarative_recommendation=recommend_similar_user_shows(1,my_show_sets,user_similarity)
print("Top 5 Recommendation Show based on Similar User")
collabarative_recommendation

[ 1 27 41 28 43 47 34 39 46 38  3 36 19 17 10  0 44 29 37 21  4 40  7 48
 18  5 26 12 32 45  9 13 14 25 24 31 49 20 42 23 30 15  2 35 33 22 16 11
  8  6]
Top 5 Recommendation Show based on Similar User


[4097, 4100, 7, 8, 4103, 4106, 12, 13, 14, 4112]

In [507]:
# Assuming you have a DataFrame `df` with 'show_id' and other relevant columns

# ... (rest of your code)

# Create a new DataFrame with the recommended show IDs
recommended_df = pd.DataFrame({'show_id': collabarative_recommendation})

# You can now print or use this DataFrame as needed
print(recommended_df)
# Merge the recommended DataFrame with the original DataFrame
merged_df = pd.merge(recommended_df, df_expanded, on='show_id')

merged_df

   show_id
0     4097
1     4100
2        7
3        8
4     4103
5     4106
6       12
7       13
8       14
9     4112


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,customer_id
0,4097,Movie,Milada,David Mrnka,"Ayelet Zurer, Robert Gant, Vica Kerekes, Aňa G...","Czech Republic, United States",12-Jan-18,2017,TV-14,124,"Dramas, International Movies",41
1,4100,Movie,Miles Davis: Birth of the Cool,Stanley Nelson,"Miles Davis, Carl Lumbly",United States,06-Mar-20,2019,TV-MA,115,"Documentaries, Music & Musicals",39
2,7,Movie,122,Yasir Al Yasiri,"Amina Khalil, Ahmed Dawood, Tarek Lotfy, Ahmed...",Egypt,01-Jun-20,2019,TV-MA,95,"Horror Movies, International Movies",39
3,8,Movie,187,Kevin Reynolds,"Samuel L. Jackson, John Heard, Kelly Rowan, Cl...",United States,01-Nov-19,1997,R,119,Dramas,28
4,4103,TV Show,Millennials,Unknown,"Nicolás Riera, Laura Laprida, Juan Manuel Guil...",Argentina,05-Nov-19,2019,TV-MA,2,"International TV Shows, Romantic TV Shows, Spa...",46
5,4106,TV Show,Million Yen Women,Unknown,"Yojiro Noda, Rila Fukushima, Rena Matsui, Miwa...",Japan,15-Aug-17,2017,TV-MA,1,"Crime TV Shows, International TV Shows, TV Dramas",43
6,12,TV Show,1983,Unknown,"Robert Więckiewicz, Maciej Musiał, Michalina O...","Poland, United States",30-Nov-18,2018,TV-MA,1,"Crime TV Shows, International TV Shows, TV Dramas",46
7,13,TV Show,1994,Diego Enrique Osorno,Unknown,Mexico,17-May-19,2019,TV-MA,1,"Crime TV Shows, Docuseries, International TV S...",18
8,13,TV Show,1994,Diego Enrique Osorno,Unknown,Mexico,17-May-19,2019,TV-MA,1,"Crime TV Shows, Docuseries, International TV S...",34
9,14,Movie,2215,Nottapon Boonprakob,Artiwara Kongmalai,Thailand,01-Mar-19,2018,TV-MA,89,"Documentaries, International Movies, Sports Mo...",46


In [508]:
df_expanded[df_expanded['customer_id']==1]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,customer_id
201,118,Movie,5 Cowok Jagoan,Anggy Umbara,"Ario Bayu, Arifin Putra, Dwi Sasono, Muhadkly ...",Unknown,05-Jan-19,2017,TV-MA,106,"Action & Adventure, Comedies, International Mo...",1
357,132,Movie,68 Kill,Trent Haaga,"Matthew Gray Gubler, AnnaLynne McCord, Alisha ...",United States,01-Sep-19,2017,R,96,"Comedies, Independent Movies, Thrillers",1
557,150,Movie,A Beautiful Life,Andrew Lau Wai-Keung,"Qi Shu, Liu Ye, Anthony Wong Chau-Sang, Liang ...","China, Hong Kong",01-Dec-18,2011,TV-14,122,"Dramas, International Movies, Romantic Movies",1
1089,198,TV Show,A Little Help with Carol Burnett,Unknown,"Carol Burnett, Russell Peters, Mark Cuban, Tar...",United States,04-May-18,2018,TV-G,1,"Stand-Up Comedy & Talk Shows, TV Comedies",1
1756,258,Movie,A Yellow Bird,K. Rajagopal,"Sivakumar Palakrishnan, Huang Lu, Seema Biswas...","Singapore, France",01-May-17,2016,TV-MA,111,"Dramas, Independent Movies, International Movies",1
...,...,...,...,...,...,...,...,...,...,...,...,...
7387,7648,Movie,Without Gorky,Cosima Spender,Unknown,United Kingdom,31-May-17,2011,TV-14,80,Documentaries,1
7410,7669,TV Show,World War II in Colour,Unknown,Robert Powell,United Kingdom,01-Aug-17,2009,TV-MA,1,"British TV Shows, Docuseries, International TV...",1
7414,7672,Movie,World's Weirdest Homes,Storm Theunissen,Charlie Luxton,Unknown,01-Feb-19,2015,TV-PG,49,Movies,1
7509,7758,Movie,Zach Galifianakis: Live at the Purple Onion,Michael Blieden,"Zach Galifianakis, Brian Unger",United States,26-Feb-19,2006,TV-MA,61,Stand-Up Comedy,1


<h1>Hybrid Model based on weightage, both collabarative and content based Recommendation</h1>

In [516]:
print("Total Recommendations based on both Collaborative and Content based Recommendations")
content_wt=6
print(content_recommendation)
print(collabarative_recommendation)
wt_content_recommendation=content_recommendation[0:content_wt]
wt_content_recommendation.extend(collabarative_recommendation[0:(10-content_wt)])
print(wt_content_recommendation)

Total Recommendations based on both Collaborative and Content based Recommendations
[1928, 3404, 685, 6195, 852, 4471, 7262]
[4097, 4100, 7, 8, 4103, 4106, 12, 13, 14, 4112]
[1928, 3404, 685, 6195, 852, 4471, 4097, 4100, 7, 8]
