**Load and Inspect Data**

In [None]:
import pandas as pd

# Load the dataset
file_path = '/content/7817_1.csv'
data = pd.read_csv("Sale Report.csv")

# Display dataset information
print(data.info())
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Fill missing values if necessary
data.fillna({'reviews.rating': data['reviews.rating'].median()}, inplace=True)

# Display the first few rows to understand the structure
print(data.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1597 entries, 0 to 1596
Data columns (total 27 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    1597 non-null   object 
 1   asins                 1597 non-null   object 
 2   brand                 1597 non-null   object 
 3   categories            1597 non-null   object 
 4   colors                774 non-null    object 
 5   dateAdded             1597 non-null   object 
 6   dateUpdated           1597 non-null   object 
 7   dimension             565 non-null    object 
 8   ean                   898 non-null    float64
 9   keys                  1597 non-null   object 
 10  manufacturer          965 non-null    object 
 11  manufacturerNumber    902 non-null    object 
 12  name                  1597 non-null   object 
 13  prices                1597 non-null   object 
 14  reviews.date          1217 non-null   object 
 15  reviews.doRecommend  

**Handle Duplicate Entries and Create User-Product Matrix**

In [None]:
import numpy as np

# Aggregate duplicate entries by averaging the ratings
data_aggregated = data.groupby(['reviews.username', 'asins']).agg({'reviews.rating': 'mean'}).reset_index()

# Pivot data to create user-product matrix
user_product_matrix = data_aggregated.pivot(index='reviews.username', columns='asins', values='reviews.rating').fillna(0)

# Normalize the user-product matrix
user_ratings_mean = np.mean(user_product_matrix.values, axis=1)
user_product_matrix_normalized = user_product_matrix.values - user_ratings_mean.reshape(-1, 1)


**Apply SVD for Matrix Factorization**

In [None]:
from scipy.sparse.linalg import svds

# Perform Singular Value Decomposition (SVD)
U, sigma, Vt = svds(user_product_matrix_normalized, k=50)
sigma = np.diag(sigma)


**Predict Ratings**

In [None]:
# Predict ratings for users and products
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
predicted_ratings = pd.DataFrame(all_user_predicted_ratings, columns=user_product_matrix.columns)


**Define Recommendation Function**

In [None]:
def recommend_products(user_id, num_recommendations=5):
    user_row_number = user_product_matrix.index.get_loc(user_id)
    sorted_user_predictions = predicted_ratings.iloc[user_row_number].sort_values(ascending=False)

    user_data = data[data['reviews.username'] == user_id]
    user_full = (user_data.merge(data, how='left', on='asins').
                 sort_values(['reviews.rating_x'], ascending=False)) # Sort by 'reviews.rating_x'

    recommendations = (data[~data['asins'].isin(user_full['asins'])].
                       merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
                             on='asins').
                       rename(columns={user_row_number: 'PredictedRating'}).
                       sort_values('PredictedRating', ascending=False).
                       iloc[:num_recommendations, :-1])

    return recommendations

**Find a Valid User ID and Generate Recommendations**

In [None]:
# Find a valid user_id from the dataset
valid_user_id = data['reviews.username'].unique()[0]  # Taking the first unique user_id as an example
print(f'Using valid user_id: {valid_user_id}')

# Example usage
print(f'Recommended products for user {valid_user_id}:')
print(recommend_products(user_id=valid_user_id))

# Display the basic information again to verify any changes made
print(data.info())
print(data.head())


Using valid user_id: Cristina M
Recommended products for user Cristina M:
                        id       asins   brand  \
1104  AVpfpK8KLJeJML43BCuD  B01BH83OOM  Amazon   
1055  AVpfpK8KLJeJML43BCuD  B01BH83OOM  Amazon   
1053  AVpfpK8KLJeJML43BCuD  B01BH83OOM  Amazon   
1052  AVpfpK8KLJeJML43BCuD  B01BH83OOM  Amazon   
1051  AVpfpK8KLJeJML43BCuD  B01BH83OOM  Amazon   

                                             categories colors  \
1104  Amazon Devices,Home,Smart Home & Connected Liv...  Black   
1055  Amazon Devices,Home,Smart Home & Connected Liv...  Black   
1053  Amazon Devices,Home,Smart Home & Connected Liv...  Black   
1052  Amazon Devices,Home,Smart Home & Connected Liv...  Black   
1051  Amazon Devices,Home,Smart Home & Connected Liv...  Black   

                 dateAdded           dateUpdated                 dimension  \
1104  2017-01-04T03:51:17Z  2017-08-13T08:31:07Z  4.8 in x 6.6 in x 3.2 in   
1055  2017-01-04T03:51:17Z  2017-08-13T08:31:07Z  4.8 in x 6.6 in x 3.2 