In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Loading dataset
df = pd.read_csv('sales.csv')

In [None]:
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [None]:
# Combine relevant text columns into a single column for TF-IDF vectorization
df['text_data'] = df['Name'] + ' ' + df['Platform'] + ' ' + df['Genre'] + ' ' + df['Publisher']

In [None]:
# Drop rows with missing values in the 'text_data' column
df = df.dropna(subset=['text_data'])

In [None]:
# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words = 'english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['text_data'])

In [None]:
# Calculate cosine similarity between items based on TF-IDF vectors
item_similarity_matrix = cosine_similarity(tfidf_matrix)

In [None]:
# Recommendation Model
def recommend_similar_items(item_index, top_n= 5):
    # Get similarity scores for the given item
    similarity_scores = item_similarity_matrix[item_index]
    # Get indices of top similar items
    top_similar_indices = similarity_scores.argsort()[-top_n-1: -1][: : -1]
    return top_similar_indices

In [None]:
# Example: Recommend top 5 similar items for the first item in the dataset
item_index = 0
top_similar_indices = recommend_similar_items(item_index)
print("Top 5 similar items for item {}: {}".format(item_index, top_similar_indices))

Top 5 similar items for item 0: [   3   13 4842   78   14]
