In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
import matplotlib.pyplot as plt

In [3]:
import seaborn as sns

In [4]:
df = pd.read_csv("All_merge.csv")
df.shape

(1858329, 14)

In [5]:
df.columns

Index(['date', 'product_code', 'market', 'customer_code', 'qty', 'fiscal_year',
       'gross_sales_amt', 'net_sales_amt', 'total_cogs_amt',
       'gross_margin_amt', 'Total_discs', 'customer', 'segment', 'product'],
      dtype='object')

In [6]:
df['customer_code'] = df['customer_code'].astype(str)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1858329 entries, 0 to 1858328
Data columns (total 14 columns):
 #   Column            Dtype  
---  ------            -----  
 0   date              object 
 1   product_code      object 
 2   market            object 
 3   customer_code     object 
 4   qty               int64  
 5   fiscal_year       int64  
 6   gross_sales_amt   float64
 7   net_sales_amt     float64
 8   total_cogs_amt    float64
 9   gross_margin_amt  float64
 10  Total_discs       float64
 11  customer          object 
 12  segment           object 
 13  product           object 
dtypes: float64(5), int64(2), object(7)
memory usage: 198.5+ MB


# Recommendation System (next product to buy)

Collaborative Filtering – Based on purchase patterns of similar customers.

Association Rule Mining (Market Basket Analysis) – "People who bought X also bought Y".

Hybrid Approach – Combine both.

#### Collaborative Filtering (Next Product Prediction)
We are doing item based collaborative filtering using NearestNeighbors (cosine similarity)

In [11]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# === 1. Filter data for 2019–2021 ===
df_filtered = df[df['fiscal_year'].between(2019, 2021)]

# === 2. Create Customer-Product matrix ===
cust_prod_matrix = (
    df_filtered
    .groupby(['customer_code', 'product_code'])['qty']
    .sum()
    .unstack(fill_value=0)
)

# === 3. Transpose to get Product-Customer matrix ===
product_matrix = cust_prod_matrix.T

# === 4. Product code → name mapping ===
product_mapping = (
    df_filtered[['product_code', 'product']]
    .drop_duplicates()
    .set_index('product_code')['product']
    .to_dict()
)

# === 5. Fit Nearest Neighbors model ===
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(product_matrix)

# === 6. Function to recommend similar products ===
def recommend_products(product_code, n=5):
    """
    Recommend n similar products to the given product_code.
    Returns list of tuples: (product_code, product_name, similarity_score).
    """
    if product_code not in product_matrix.index:
        return []
    
    distances, indices = model_knn.kneighbors(
        [product_matrix.loc[product_code].values],
        n_neighbors=n+1
    )
    
    recommended = []
    for j, i in enumerate(indices[0]):
        if j == 0:  # skip the same product
            continue
        code = product_matrix.index[i]
        name = product_mapping.get(code, "Unknown Product")
        similarity = round(1 - distances[0][j], 3)  # cosine similarity
        recommended.append((code, name, similarity))
    
    return recommended

In [12]:
df_filtered[df_filtered['segment']=="Notebook"]

Unnamed: 0,date,product_code,market,customer_code,qty,fiscal_year,gross_sales_amt,net_sales_amt,total_cogs_amt,gross_margin_amt,Total_discs,customer,segment,product
63314,2018-09-01,A4918110101,Australia,70008169,2,2019,708.8300,460.77917,219.757230,241.02194,248.050839,AltiQ Exclusive,Notebook,AQ BZ 101
63315,2018-09-01,A4918110101,Australia,90008165,2,2019,708.8300,354.07626,216.001280,138.07498,354.753735,Forward Stores,Notebook,AQ BZ 101
63316,2018-09-01,A4918110101,Australia,90008166,1,2019,354.4150,182.02798,108.176285,73.85170,172.387016,Sound,Notebook,AQ BZ 101
63317,2018-09-01,A4918110101,Australia,90008167,2,2019,708.8300,418.61005,218.272870,200.33717,290.219950,Electricalsocity,Notebook,AQ BZ 101
63318,2018-09-01,A4918110101,Australia,70008170,2,2019,708.8300,404.72058,217.783970,186.93661,304.109410,Atliq e Store,Notebook,AQ BZ 101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209391,2021-08-01,A4721110705,USA,90022079,12,2021,4528.8830,2235.88100,1428.317000,807.56415,2293.001650,walmart,Notebook,AQ GEN Z
1209392,2021-08-01,A4721110705,USA,90022080,13,2021,4906.2896,2227.36470,1541.069600,686.29530,2678.924940,Staples,Notebook,AQ GEN Z
1209393,2021-08-01,A4721110705,USA,90022074,23,2021,8680.3580,4253.33350,2736.573700,1516.75950,4427.025500,Flipkart,Notebook,AQ GEN Z
1209394,2021-08-01,A4721110705,USA,90022082,9,2021,3396.6620,1781.39990,1074.602300,706.79760,1615.262170,Amazon,Notebook,AQ GEN Z


In [13]:
print(recommend_products('A4918110101', n=5))                    # (product_code, product_name, similarity_score)

[('A5119110302', 'AQ BZ Gen Y', 0.996), ('A4319110305', 'AQ Velocity', 0.996), ('A4918110102', 'AQ BZ 101', 0.995), ('A4218110202', 'AQ Digit', 0.995), ('A0519150206', 'AQ Mforce Gen Y', 0.995)]
