In [None]:
import pandas as pd
from datetime import datetime as dt,timedelta
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors

df=pd.read_csv("/content/drive/MyDrive/Data/online_retail.csv")

df.head() # It Will print starting few lines of the data

# df.tail() # it will print the ending few lines of the data

df.dropna(subset=['CustomerID'],inplace=True)  # Removing the Null Value in the CustomerID column

df["InvoiceDate"]=pd.to_datetime(df["InvoiceDate"])
df["Total_Amount"]=df['Quantity']*df['UnitPrice']

df.head()

reference_date=pd.Timestamp(dt.now().date())  # here we are add the reference date because in RFM model the date should not be 12,13 years old (as we are having the date 12,13 year old)

# print(df.columns)
reference_date=df['InvoiceDate'].max()+ timedelta(days=1)

reference_date

RFM=df.groupby("CustomerID").agg({
    "InvoiceDate":lambda x: (reference_date - x.max()).days,
    "InvoiceNo":"count",
    "Total_Amount":"sum"
})

RFM.rename(columns={'InvoiceDate':"Recency","InvoiceNo":"Frequency","Total_Amount":"Value"},inplace=True)
RFM.head()



quantiles=RFM.quantile(q=[0.25,0.5,0.75])

def RScore(x,p,d):
  if p=='Recency':
    if x <=d[p][0.25]:
      return 4
    elif x <= d[p][0.50]:
        return 3
    elif x<=d[p][0.75]:
        return 2
    else:
        return 1
  else:

    if x<=d[p][0.25]:
          return 1
    elif x<=d[p][0.50]:
          return 2
    elif x<=d[p][0.75]:
          return 3
    else:
          return 4


RFM['R']=RFM['Recency'].apply(RScore,args=('Recency',quantiles,))
RFM['F']=RFM['Frequency'].apply(RScore,args=('Frequency',quantiles,))
RFM['M']=RFM['Value'].apply(RScore,args=('Value',quantiles,))

RFM.head()

RFM['R'].dtype

RFM["RFM_Segment"]=RFM['R'].astype(str) + RFM['F'].astype(str)+ RFM['M'].astype(str)
RFM["RFM_Score"]=RFM[['R','F','M']].sum(axis=1)
RFM.head()

Segment_Label=["Low-Value","Mid-Value","High-Value"]
RFM.head()

def assign_segment(score):
  if (score < 5):
    return 'Low-Value'
  elif score < 9:
    return 'Mid-Value'
  else :
    return 'High-Value'

RFM['RFM_Segment_Label']=RFM['RFM_Score'].apply(assign_segment)

RFM.head()

segment_counts=RFM['RFM_Segment_Label'].value_counts().reset_index()
segment_counts.columns=['RFM_Segment','Count']
segment_counts=segment_counts.sort_values('RFM_Segment')

fig=px.bar(segment_counts,x='RFM_Segment',
           y='Count',
           title='Customer Distribution by RFM Segment',
           labels={'RFM_Segment':'RFM Segment','Count':'Number of Customer'},
           color='RFM_Segment',
           color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()

RFM['RFM_Customer_Segments']=''
RFM.loc[RFM['RFM_Score']>=9,'RFM_Customer_Segments']="VIP/Loyal"
RFM.loc[(RFM['RFM_Score']>=6) & (RFM['RFM_Score']<9),'RFM_Customer_Segments']='Potential Loyal'
RFM.loc[(RFM['RFM_Score']>=5) & (RFM['RFM_Score']<6),'RFM_Customer_Segments']='At Risk Customers'
RFM.loc[(RFM['RFM_Score']>=4) & (RFM['RFM_Score']<5),"RFM_Customer_Segments"]="Can't Lose"
RFM.loc[(RFM['RFM_Score']>=3) & (RFM['RFM_Score']<4),"RFM_Customer_Segments"]="Lost"

segment_counts=RFM['RFM_Customer_Segments'].value_counts().sort_index()

segment_product_counts=RFM.groupby(['RFM_Segment_Label','RFM_Customer_Segments']).size().reset_index(name='Count')

segment_product_counts=segment_product_counts.sort_values('Count',ascending=False)

fig_treemap_segment_product=px.treemap(segment_product_counts,
                                       path=["RFM_Segment_Label","RFM_Customer_Segments"],
                                       values='Count',
                                       color='RFM_Segment_Label',
                                       color_discrete_sequence=px.colors.qualitative.Pastel,
                                       title='RFM Customer Segments by Value')

fig_treemap_segment_product.show()

vip_segment=RFM[RFM["RFM_Customer_Segments"]=='VIP/Loyal']

fig=go.Figure()
fig.add_trace(go.Box(y=vip_segment['Recency'],name='Receny'))  # making the box plot
fig.add_trace(go.Box(y=vip_segment['Frequency'],name="Frequency"))
fig.add_trace(go.Box(y=vip_segment['Value'],name="Value"))

correlation_matrix=vip_segment[['R','F','M']].corr()

fig_heatmap=go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.columns,
    y=correlation_matrix.columns,
    colorscale='Rdbu',
    colorbar=dict(title='Correlation')))

fig_heatmap.update_layout(title="Correlation Matrix of RFM Values within Champions Segment")

fig_heatmap.show()

pastel_colors=plotly.colors.qualitative.Pastel
fig=go.Figure(data=[go.Bar(x=segment_counts.index,y=segment_counts.values,marker=dict(color=pastel_colors))])

# vip_color='rgb(158,202,225)'

# fig.update_traces(marker_color=[vip_color if segment=='Champions' else pastel_colors[i]
#                                 for i,segment in enumerate(segment_counts.index)],
#                   marker_line_color='rbg(8,48,107)',
#                   marker_line_width=1.5,opacity=0.6)

# fig.update_layout(title='Comparison of RFM Segments',
#                   xaxis_title='RFM Segments',
#                   yaxis_title="Number of Customer",
#                   showlegend=False)
# fig.show()

vip_color = 'rgb(158,202,225)'  # Specific color for 'Champions' segment
fig.update_traces(
    marker_color=[
        vip_color if segment == 'Champions' else pastel_colors[i % len(pastel_colors)]
        for i, segment in enumerate(segment_counts.index)
    ],
    marker_line_color='rgb(8,48,107)',  # Fixed typo here
    marker_line_width=1.5,
    opacity=0.6
)

fig.update_layout(
    title='Comparison of RFM Segments',
    xaxis_title='RFM Segments',
    yaxis_title="Number of Customers",
    showlegend=False
)

fig.show()


segment_scores=RFM.groupby('RFM_Customer_Segments')[['R','F','M']].mean().reset_index()
fig=go.Figure()

# Add bars for Recency Score
fig.add_trace(go.Bar(
    x=segment_scores['RFM_Customer_Segments'],
    y=segment_scores['R'],
    name='Recency Score',
    marker_color='rgb(158,202,225)'

))


# Add bars for Frequency Score
fig.add_trace(go.Bar(
    x=segment_scores['RFM_Customer_Segments'],
    y=segment_scores['F'],
    name='Frequency Score',
    marker_color='rgb(94,158,217)'

))


# Add bars for Monetary Score
fig.add_trace(go.Bar(
    x=segment_scores['RFM_Customer_Segments'],
    y=segment_scores['M'],
    name='Monetary Score',
    marker_color='rgb(32,102,148)'
))


# # Update The Layout
# fig.update_layout(
#     title='Comparison of RFM Segements based on Recency, Frequency , and Monetary Scores',
#     xaxis_title='RFM Segments',
#    yaxis_title="Score",
#     barmode='group',
#     marker_color='rgb(32,102,148)'
# )


fig.update_layout(
    title='Comparison of RFM Segments based on Recency, Frequency, and Monetary Scores',
    xaxis_title='RFM Segments',
    yaxis_title='Scores',
    barmode='group',  # Group bars for comparison
    template='plotly_white'  # Optional for a clean background
)

fig.show()



