<a href="https://colab.research.google.com/github/NishmithaJR/Data-Analytics-Project/blob/main/Customer_Segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv("/content/drive/MyDrive/Data Analytics/Customer segmentation.csv")
df

Unnamed: 0,Customer_id,Invoice_no,Invoice_date,Quantity,Unit_price
0,101,INV001,01-06-2024,2,50
1,101,INV002,10-06-2024,1,30
2,101,INV003,20-06-2024,3,20
3,102,INV004,15-05-2024,1,100
4,102,INV005,05-06-2024,2,60
5,103,INV006,15-06-2024,1,25
6,103,INV007,18-06-2024,2,30
7,104,INV008,22-04-2024,5,10
8,104,INV009,20-05-2024,3,15
9,104,INV010,10-06-2024,4,12


**RFM Analysis in Python**

In [None]:
# Add TotalAmount column
df['TotalAmount'] = df['Quantity'] * df['Unit_price']

In [None]:
# Convert Invoice_date to datetime
df['Invoice_date'] = pd.to_datetime(df['Invoice_date'], format="%d-%m-%Y")

In [None]:
# Create a reference date
import datetime as dt
snapshot_date = df['Invoice_date'].max() + dt.timedelta(days=1)

In [None]:
# Create the RFM table
rfm = df.groupby('Customer_id').agg({
    'Invoice_date': lambda x: (snapshot_date - x.max()).days,  # Recency
    'Invoice_no': 'nunique',                                   # Frequency
    'TotalAmount': 'sum'                                       # Monetary
}).reset_index()

rfm.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary
0,101,7,3,190
1,102,22,2,220
2,103,9,2,85
3,104,17,3,143
4,105,2,2,550


In [None]:
# Score each customer (1 = lowest, 5 = best)
rfm['R'] = pd.qcut(rfm['Recency'], 5, labels=[5, 4, 3, 2, 1])
rfm['F'] = pd.qcut(rfm['Frequency'].rank(method='first'), 5, labels=[1, 2, 3, 4, 5])
rfm['M'] = pd.qcut(rfm['Monetary'], 5, labels=[1, 2, 3, 4, 5])

In [None]:
# Create a combined RFM score
rfm['RFM_Score'] = rfm['R'].astype(str) + rfm['F'].astype(str) + rfm['M'].astype(str)
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R,F,M,RFM_Score
0,101,7,3,190,4,5,4,454
1,102,22,2,220,1,3,5,135
2,103,9,2,85,4,4,1,441
3,104,17,3,143,2,5,2,252
4,105,2,2,550,5,4,5,545


In [None]:
# Add customer segment labels
def rfm_segment(row):
    r = int(row['R'])
    f = int(row['F'])
    m = int(row['M'])

    if r >= 4 and f >= 4 and m >= 4:
        return 'Best Customers'
    elif r >= 4 and f >= 3:
        return 'Loyal Customers'
    elif r >= 4:
        return 'Recent Customers'
    elif f >= 4:
        return 'Frequent Buyers'
    elif m >= 4:
        return 'Big Spenders'
    elif r <= 2 and f <= 2:
        return 'At Risk'
    else:
        return 'Others'

rfm['Segment'] = rfm.apply(rfm_segment, axis=1)
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R,F,M,RFM_Score,Segment
0,101,7,3,190,4,5,4,454,Best Customers
1,102,22,2,220,1,3,5,135,Big Spenders
2,103,9,2,85,4,4,1,441,Loyal Customers
3,104,17,3,143,2,5,2,252,Frequent Buyers
4,105,2,2,550,5,4,5,545,Best Customers


**Export RFM Data from Colab**

In [None]:
# Save as csv file
rfm.to_csv("rfm_segmented_customers.csv", index=False)

# Download
from google.colab import files
files.download("rfm_segmented_customers.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>