In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
file_path = r"C:\Users\tanap\OneDrive\เดสก์ท็อป\MADT3-NIDA\(2-2566) MADT 6004 Applied Data Analytics for Business\Model\Collaborative Filtering\credit_card_campaigns_data_v2.csv"
camp_df = pd.read_csv(file_path)
camp_df.head()

Unnamed: 0,cust_id,pro_restrnt01,pro_restrnt02,pro_restrnt03,pro_restrnt04,pro_restrnt05,pro_restrnt06,pro_restrnt07,pro_restrnt08,pro_restrnt09,...,pro_restrnt41,pro_restrnt42,pro_restrnt43,pro_restrnt44,pro_restrnt45,pro_restrnt46,pro_restrnt47,pro_restrnt48,pro_restrnt49,pro_restrnt50
0,66000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
1,66000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,66000003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3,66000004,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,66000005,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
interaction_matrix = camp_df.set_index('cust_id').iloc[:, :]
interaction_sparse_matrix = csr_matrix(interaction_matrix.values)
promotion_popularity = interaction_matrix.mean(axis=0).sort_values(ascending=False)
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(interaction_sparse_matrix)

In [4]:
def get_recommendations(cust_id, n_neighbors:int, n_recommendations:int):
    customer_index = interaction_matrix.index.get_loc(cust_id)
    customer_interactions = interaction_matrix.iloc[customer_index]
    customer_index_never = 0

    if customer_interactions.sum() == 0:
        
        recommendations = promotion_popularity.head(n_recommendations)
        customer_index_never = customer_index
        
    else:

        distances, indices = model_knn.kneighbors(interaction_sparse_matrix[customer_index], n_neighbors=n_neighbors + 1)
        
        similar_customers = interaction_matrix.index[indices.flatten()].tolist()
        if cust_id in similar_customers:
            similar_customers.remove(cust_id)
        
        similar_customers_interactions = interaction_matrix.loc[similar_customers]
        recommendation_scores = similar_customers_interactions.mean(axis=0)

        if np.all(similar_customers_interactions.sum(axis=1) == similar_customers_interactions.iloc[0].sum()):
            recommendations = promotion_popularity.head(n_recommendations)

        else:
            already_participated = interaction_matrix.loc[cust_id] > 0
            recommendation_scores[already_participated] = 0
            
            recommendations = recommendation_scores.sort_values(ascending=False).head(n_recommendations)

    if len(recommendations) < n_recommendations:
        additional_recommendations = promotion_popularity.loc[~promotion_popularity.index.isin(recommendations.index)]
        recommendations = recommendations.append(additional_recommendations.head(n_recommendations - len(recommendations)))
    
    return recommendations, customer_index_never

In [5]:
all_recommendations = {}
cust_never = []
for cust_id in interaction_matrix.index:
    recommendations, customer_index = get_recommendations(cust_id, n_neighbors=5, n_recommendations=1)
    if customer_index > 0:
        cust_never.append(customer_index)
    all_recommendations[cust_id] = recommendations

recommendations_df = pd.DataFrame.from_dict(all_recommendations, orient='index').stack().reset_index()
recommendations_df.columns = ['Customer_ID', 'Promotion', 'Recommendation_Score']

print(recommendations_df.shape)
recommendations_df.sort_values(by='Customer_ID', ascending=True).head()

(1000, 3)


Unnamed: 0,Customer_ID,Promotion,Recommendation_Score
0,66000001,pro_restrnt12,0.8
380,66000002,pro_restrnt35,0.4
434,66000003,pro_restrnt02,0.8
512,66000004,pro_restrnt21,0.4
1,66000005,pro_restrnt12,0.6


In [392]:
# file_path_collaborative_filtering_result = r"C:\Users\tanap\OneDrive\เดสก์ท็อป\MADT3-NIDA\(2-2566) MADT 6004 Applied Data Analytics for Business\Model\Collaborative Filtering\02_[model]_collaborative_filtering_result.csv"
# recommendations_df.to_csv(file_path_collaborative_filtering_result, index=False)