In [5]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic, SVD, accuracy
from surprise.model_selection import cross_validate

# Load the data
df = pd.read_csv("Data_Customer_Portfolio.csv")

# Normalize the AmountPurchased column
df['AmountPurchased'] = (df['AmountPurchased'] - df['AmountPurchased'].min()) / (df['AmountPurchased'].max() - df['AmountPurchased'].min())

# Convert the DataFrame into the format required by surprise
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(df[['CustomerID', 'CompanyTicker', 'AmountPurchased']], reader)

In [6]:
knn_algo = KNNBasic(sim_options={'user_based': True})
cv_results_knn = cross_validate(knn_algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Print mean RMSE and MAE from cross-validation for KNN
print(f"Mean RMSE for KNN: {cv_results_knn['test_rmse'].mean()}")
print(f"Mean MAE for KNN: {cv_results_knn['test_mae'].mean()}")

# Experiment with SVD algorithm
svd_algo = SVD()
cv_results_svd = cross_validate(svd_algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Print mean RMSE and MAE from cross-validation for SVD
print(f"Mean RMSE for SVD: {cv_results_svd['test_rmse'].mean()}")
print(f"Mean MAE for SVD: {cv_results_svd['test_mae'].mean()}")





Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.3576  0.3569  0.3492  0.3584  0.3455  0.3535  0.0052  
MAE (testset)     0.2924  0.2950  0.2872  0.2963  0.2865  0.2915  0.0040  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.01    0.00    0.01    0.01    0.01    0.01    0.00    
Mean RMSE for KNN: 0.3535073105294049
Mean MAE for KNN: 0.29148436247220066
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std  

In [7]:
# Choose the best performing algorithm based on RMSE
best_algo = knn_algo if cv_results_knn['test_rmse'].mean() < cv_results_svd['test_rmse'].mean() else svd_algo
best_algo

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2261b859270>

In [8]:
def get_recommendations(customer_id, algorithm, dataframe, n=10):
    company_tickers = dataframe["CompanyTicker"].unique()
    identified_companies = dataframe[dataframe["CustomerID"] == customer_id]["CompanyTicker"].unique()
    companies_to_predict = [company for company in company_tickers if company not in identified_companies]
    predictions = [algorithm.predict(customer_id, company) for company in companies_to_predict]
    predictions.sort(key=lambda x: x.est, reverse=True)
    top_recs = [pred.iid for pred in predictions[:n]]
    return top_recs



In [9]:
# Example usage
customer_id = input("Enter customer id between 1-100")
recommendations = get_recommendations(customer_id, best_algo, df, n=5)
print(f"Top 5 company recommendations for customer {customer_id}: {recommendations}")

Top 5 company recommendations for customer 10: ['DUK', 'TGT', 'MMC', 'HD', 'CTAS']
