<a href="https://colab.research.google.com/github/ChristeenaGiji/Machine-learning/blob/main/item_Recomendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Data processing
import pandas as pd
import numpy as np
import scipy.stats

# Visualization
import seaborn as sns

# Similarity
from sklearn.metrics.pairwise import cosine_similarity


data=pd.read_csv('/content/results.csv')
data.head()

# Use 'creatorId' as 'userId' and 'ItemName' as 'item'
data['item_count'] = 1  # Assuming each entry is one interaction

# Aggregate by user and item
user_item = data.groupby(['creatorId', 'itemName'], as_index=False).agg({'item_count': 'sum'})

# Number of users
print('The dataset has', user_item['creatorId'].nunique(), 'unique users')

# Number of items
print('The dataset has', user_item['itemName'].nunique(), 'unique items')

# Number of interactions
print('The dataset has', user_item['item_count'].sum(), 'total interactions')

# Create user-item matrix
matrix = user_item.pivot_table(index='itemName', columns='creatorId', values='item_count', fill_value=0)

# Normalize the user-item matrix
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis=0)

# Item similarity matrix using cosine similarity
item_similarity_cosine = cosine_similarity(matrix_norm)
item_similarity_df_cosine = pd.DataFrame(item_similarity_cosine, index=matrix_norm.index, columns=matrix_norm.index)

def item_based_rec_cosine(picked_userid, number_of_similar_items=5, number_of_recommendations=3):
    # Items that the user has not interacted with (use original matrix before normalization)
    picked_userid_unwatched = pd.DataFrame(matrix[picked_userid] == 0).reset_index()
    picked_userid_unwatched = picked_userid_unwatched[picked_userid_unwatched[picked_userid]==True]['itemName'].values.tolist()

    # Items that the user has interacted with (use normalized matrix)
    picked_userid_watched = pd.DataFrame(matrix_norm[picked_userid].dropna(axis=0, how='all')\
                          .sort_values(ascending=False)).reset_index().rename(columns={picked_userid: 'rating'})

    # Dictionary to save unwatched items and predicted rating pair
    rating_prediction = {}

    # Loop through unwatched items
    for picked_item in picked_userid_unwatched:
        # Calculate the similarity score of the picked item with other items (using cosine similarity)
        picked_item_similarity_score = item_similarity_df_cosine[[picked_item]].reset_index().rename(columns={picked_item: 'similarity_score'})

        # Rank the similarities between the user's watched items and the picked unwatched item
        picked_userid_watched_similarity = pd.merge(left=picked_userid_watched, right=picked_item_similarity_score, on='itemName', how='inner')\
                                              .sort_values('similarity_score', ascending=False)[:number_of_similar_items]

        # Calculate the predicted rating
        if np.sum(picked_userid_watched_similarity['similarity_score']) == 0:
            predicted_rating = 0  # Default rating if no similarity
        else:
            predicted_rating = round(np.average(picked_userid_watched_similarity['rating'],
                                                weights=picked_userid_watched_similarity['similarity_score']), 6)

        # Save the predicted rating
        rating_prediction[picked_item] = predicted_rating

    # Return the top recommendations
    return sorted(rating_prediction.items(), key=lambda x: x[1], reverse=True)[:number_of_recommendations]


# Get recommendations for a specific user
recommended_items = item_based_rec_cosine(picked_userid='abc@xyz.com', number_of_similar_items=5, number_of_recommendations=3)
print(recommended_items)




The dataset has 22 unique users
The dataset has 98 unique items
The dataset has 110 total interactions
[('ES_All Night Dance - PW.mp3', 0.188609), ('ES_Pebble Beach - Peace Reels.mp3', 0.188609), ('ES_Purple Sunset - Heyson.mp3', 0.188609)]
