In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_excel("Online Retail.xlsx")
df.to_csv("Online Retail.xlsx",index= False)
print(df.head())

In [None]:

df=df.dropna(subset=['CustomerID'])
print(df.isnull().sum())

In [None]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
snapshot_date = df['InvoiceDate'].max() + pd.Timedelta(days=1)



In [None]:
rfm = df.groupby('CustomerID').agg({
    'InvoiceDate': lambda x: (snapshot_date - x.max()).days,  # Recency
    'InvoiceNo': 'nunique',                                  # Frequency
    'TotalPrice': 'sum'                                      # Monetary
}).reset_index()

rfm.rename(columns={
    'InvoiceDate': 'Recency',
    'InvoiceNo': 'Frequency',
    'TotalPrice': 'Monetary'
}, inplace=True)

In [None]:
rfm['R_Score'] = pd.cut(rfm['Recency'], bins=5, labels=[5,4,3,2,1])
rfm['F_Score'] = pd.cut(rfm['Frequency'], bins=5, labels=[1,2,3,4,5])
rfm['M_Score'] = pd.cut(rfm['Monetary'], bins=5, labels=[1,2,3,4,5])

# Combine RFM score
rfm['RFM_Score'] = rfm[['R_Score','F_Score','M_Score']].astype(int).sum(axis=1)

In [None]:
def segment_me(score):
    if score >= 12:
        return 'Champions'
    elif score >= 9:
        return 'Loyal Customers'
    elif score >= 6:
        return 'Potential Loyalist'
    else:
        return 'At Risk / Lost'

rfm['Segment'] = rfm['RFM_Score'].apply(segment_me)

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(data=rfm, x='Segment', order=rfm['Segment'].value_counts().index)
plt.title("Customer Segments Distribution")
plt.ylabel("Number of Customers")
plt.xticks(rotation=30)
plt.show()

# Heatmap of average RFM scores per segment
rfm_mean = rfm.groupby('Segment')[['Recency','Frequency','Monetary']].mean()

plt.figure(figsize=(8,5))
sns.heatmap(rfm_mean, annot=True, fmt=".1f", cmap="Blues")
plt.title("Average RFM Values by Segment")
plt.show()

print(rfm)