### Importing the libraries

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix 
# Compressed Sparse Row matrix which is used to save memory by only storing non-zero elements
from sklearn.decomposition import TruncatedSVD
# dimensionality reduction technique used to reduce the number of features in the data.
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

### Loading the data

In [None]:
data = pd.read_csv('Final_Data.csv')

In [None]:
data.head()

### Creating a pivot table to represent the user-item matrix

In [None]:
# The resulting matrix represents the quantity of items bought by each customer.
# Any missing or NaN values are replaced with 0.
user_item_matrix = data.pivot_table(index='Customer_Name', columns='Item_Name', values='Quantity', fill_value=0)

### Converting the user-item matrix to a sparse matrix

In [None]:
# Convert the user-item matrix to a sparse matrix using the csr_matrix() function from the Scipy library.
# The sparse matrix is used to store only non-zero elements, which helps save memory space.
sparse_matrix = csr_matrix(user_item_matrix.values)

### Split the data into training and testing sets

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=10)

### Perform matrix factorization using SVD on the training data

In [None]:
# The code first determines the number of components to be used in the TruncatedSVD function by
# taking the minimum value of the length of user_item_matrix columns and user_item_matrix indices, subtracting 1.
# The TruncatedSVD function is then used to model the user-item matrix with the specified number of components.
# The resulting user_factors matrix represents the user embeddings, 
# while the item_factors matrix represents the item embeddings.

n_components = min(len(user_item_matrix.columns), len(user_item_matrix.index)) - 1
model = TruncatedSVD(n_components=n_components, random_state=42)
user_factors = model.fit_transform(sparse_matrix)
item_factors = model.components_

### Function to recommend items based on the customer's purchase history

In [None]:
# Here is how the function works:

#The index of the customer in the user_item_matrix is determined using the get_loc method of the index object.
#The customer's ratings are retrieved from the user_factors matrix by accessing the row corresponding to the customer index using indexing.
#The item scores are calculated by taking the dot product of the customer ratings and the item_factors matrix.
#The indices of the n items with the highest scores are selected using argsort() and [::-1] to sort in descending order.
#The names of the top n items are looked up from the user_item_matrix columns using the top item indices.
#The function returns the names of the recommended items as an output.
n=5

def recommend_items(customer_name, n=n):
    customer_index = user_item_matrix.index.get_loc(customer_name)
    customer_ratings = user_factors[customer_index, :]
    item_scores = np.dot(customer_ratings, item_factors)
    top_item_indices = item_scores.argsort()[::-1][:n]
    top_items = user_item_matrix.columns[top_item_indices]
    return top_items

### Example usage

In [None]:
customer_name = 'customer5'
recommended_items = recommend_items(customer_name)
print("Recommended items for", customer_name + ":", recommended_items)

### Calculating training accuracy

In [None]:
train_accuracies = []
train_customer_names = train_data['Customer_Name'].unique()

for customer_name in train_customer_names:
    customer_data = train_data[train_data['Customer_Name'] == customer_name]
    true_items = customer_data['Item_Name'].unique()
    recommended_items = recommend_items(customer_name, n)

    num_correct = len(set(true_items) & set(recommended_items))
    accuracy = num_correct / min(n, len(true_items))  # Top-N accuracy
    train_accuracies.append(accuracy)

train_average_accuracy = np.mean(train_accuracies)
print("Training Average Top-{} Accuracy: {:.2f}%".format(n, train_average_accuracy * 100))

### Calculating testing accuracy

In [None]:
test_accuracies = []
test_customer_names = test_data['Customer_Name'].unique()

for customer_name in test_customer_names:
    customer_data = test_data[test_data['Customer_Name'] == customer_name]
    true_items = customer_data['Item_Name'].unique()
    recommended_items = recommend_items(customer_name, n)

    num_correct = len(set(true_items) & set(recommended_items))
    accuracy = num_correct / min(n, len(true_items))  # Top-N accuracy
    test_accuracies.append(accuracy)

test_average_accuracy = np.mean(test_accuracies)
print("Testing Average Top-{} Accuracy: {:.2f}%".format(n, test_average_accuracy * 100))

### Taking the input of customer number from the user to make final recommendations

In [None]:
customer_num = int(input("Enter the customer number: "))
if customer_num>0 and customer_num<=78:
    recommended_items = recommend_items("customer"+str(customer_num))
    print("\nRecommended items for customer"+str(customer_num) + ":\n")
    for i in recommended_items.values:
        print(i)
else:
    print('Customer number should be between 1 and 78')