In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.metrics import davies_bouldin_score

## Read Dataset

In [None]:
df = pd.read_csv ('/content/drive/MyDrive/Recommendation/dfn3share.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,user,service,count,subcat,gender
0,3646,92,127,15,1
1,3646,94,18,15,1
2,3646,93,19,15,1
3,3646,95,2,17,1
4,3646,114,1,15,1


## Hybrid Model

In [None]:
def Hybrid(df):
  # Create the matrix
  min_counting=1
  df_top_services = df[df['count']>=min_counting]
  df_counting_with_top_services = df[df['service'].isin(list(df_top_services.index))]

  min_service_rated = 1
  df_users = df_counting_with_top_services[['service','user']].groupby(['user']).agg(['count']).sort_values(('service','count'),ascending=False)
  df_top_rating_users = df_users[df_users[('service', 'count')]>=min_service_rated]
  top_rating_users = list(df_top_rating_users.index)
  df_final=df_counting_with_top_services[df_counting_with_top_services['user'].isin(top_rating_users)]
  
  df_user_item_matrix = df_final.pivot(index='user',columns='service',values='count')
  return df_user_item_matrix


In [None]:
def similar_service(item):
  service_name = item[92]
  return item.corrwith(service_name).sort_values(ascending=False).head(10)
  

In [None]:
def recom_user(random_user,item):
  # random_user=3646
  random_user_df = item[item.index==random_user]
  # Let's choose non-NaN. service used by all 3646:
  service_used = random_user_df.columns[random_user_df.notna().any()].tolist()
  # we have reduced the dataset based on services watched by user 3646:
  service_used_df = item[service_used]
  # information on how many services each user used in total:
  user_service_count = service_used_df.T.notnull().sum()

  user_service_count = user_service_count.reset_index()
  user_service_count.columns = ["user","service_count"]
  # user_service_count.head().sort_values(by='service_count',ascending=False)
  # 60% of services used by 3646:
  perc = len(service_used) * 60 / 100
  # People who have use more than 60% service together with 3646 users:
  users_same_service = user_service_count[user_service_count["service_count"] > perc]["user"]
  # Let's combine the data of user #3646 and similar users:
  final_df = pd.concat([service_used_df[service_used_df.index.isin(users_same_service)],
                      random_user_df[service_used]])
  #corr for all users:
  corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
  corr_df = pd.DataFrame(corr_df, columns=["corr"])
  corr_df.index.names = ['user_id_1', 'user_id_2']
  corr_df = corr_df.reset_index()
  # Users with a correlation of %65 or more with 3646 users:
  top_users = corr_df[(corr_df["user_id_1"] == random_user) & (corr_df["corr"] >= 0.3)][
    ["user_id_2", "corr"]].reset_index(drop=True)

  top_users = top_users.sort_values(by='corr', ascending=False)
  top_users.rename(columns={"user_id_2": "user"}, inplace=True)

  top_users_ratings = top_users.merge(df[["user", "service", "count"]], how='inner')

  top_users_ratings = top_users_ratings[top_users_ratings["user"] != random_user]

  # Calculate the Weighted Average Recommendation Score and keep the first 5 services.

  #Let's do a single score with the most similar by corr * rating:
  top_users_ratings['weighted_rating'] = top_users_ratings['corr'] * top_users_ratings['count']
  top_users_ratings.groupby('service').agg({"weighted_rating": "mean"})

  recommendation_df = top_users_ratings.groupby('service').agg({"weighted_rating": "mean"})
  recommendation_df = recommendation_df.reset_index()
  recommendation_df.head().sort_values("weighted_rating", ascending=False)
  service_to_be_recommend = recommendation_df[recommendation_df["weighted_rating"] > 3].sort_values("weighted_rating", ascending=False)
  return service_to_be_recommend['service'].to_list()



In [None]:
item=Hybrid(df)
recom_user(3646	,item)

[92, 337, 508, 393, 175, 171, 75, 52, 20, 17]

In [None]:
import csv

def generate_output1():
    count=0
    # Iterate over all users in the dataset
    all_users = df1['user'].unique()[:10]
    filename = '/content/drive/MyDrive/Final_Recommender/Hybrid/user_Hybrid.csv'
    
    with open(filename, 'a', newline='') as csvfile:  # changed mode from 'w' to 'a'
        writer = csv.writer(csvfile)
        
        # write header only if the file is empty
        if csvfile.tell() == 0:
            writer.writerow(['User ID', 'Service'])
        
        for user in all_users:
            item=Hybrid(df)
            user_output = recom_user(user,item)
            print(user)
            print(user_output)

            writer.writerow([user, user_output]) 
            
            count += 1
            print(count)