In [81]:
from ipywidgets import interact, widgets
from IPython.display import display, clear_output, Image
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from datetime import datetime, date
plt.style.use('ggplot')


import warnings
warnings.filterwarnings('ignore')



trans = pd.read_csv(r"D:\Sem 3\PRJ-1\Transaction cleaned.csv")
cust = pd.read_csv(r"D:\Sem 3\PRJ-1\CustomerDemographic_Cleaned.csv")
merged_trans_cust = pd.merge(trans, cust, left_on='customer_id', right_on='customer_id', how='inner')
merged_trans_cust['transaction_date']= pd.to_datetime(merged_trans_cust['transaction_date'])

# 1. RFM Analysis
max_trans_date = max(merged_trans_cust['transaction_date']).date()
comparison_date = datetime.strptime(str(max_trans_date), "%Y-%m-%d")
rfm_table = merged_trans_cust.groupby(['customer_id']).agg({'transaction_date': lambda date : (comparison_date - date.max()).days,
                                                            'product_id' : lambda prod_id : len(prod_id), 
                                                            'Profit' : lambda p : sum(p)})

rfm_table.rename(columns={'transaction_date' : 'recency', 
                        'product_id' : 'frequency',
                        'Profit' : 'monetary'} , inplace=True)
rfm_table['r_quartile'] = pd.qcut(rfm_table['recency'], 4, ['4','3','2','1'])
rfm_table['f_quartile'] = pd.qcut(rfm_table['frequency'], 4, ['1','2','3','4'])
rfm_table['m_quartile'] = pd.qcut(rfm_table['monetary'], 4, ['1','2','3','4'])
rfm_table

#Calculation of RFM Score
rfm_table['rfm_score'] = 100*rfm_table['r_quartile'].astype(int)+10*rfm_table['f_quartile'].astype(int)+rfm_table['m_quartile'].astype(int)
rfm_table['customer_title'] = pd.qcut(rfm_table['rfm_score'], 4, ['Bronze','Silver','Gold','Platinum'])

## Merging both RFM Table with Transaction and Customer Tables
cust_trans_rfm = pd.merge(merged_trans_cust, rfm_table, left_on='customer_id', right_on='customer_id', how='inner')

### Defining an Age Group Feature
cust_trans_rfm['Age_Group'] = cust_trans_rfm['Age'].apply(lambda x : (math.floor(x/10)+1)*10)

#### Developing a Customer tag based on RFM Score Achieved
def cust_score_title_lkup(cols):
    
    rfm_score = cols[0]
    
    if rfm_score >= 444:
        return 'Platinum Customer'
    elif rfm_score >=433 and rfm_score < 444:
        return 'Very Loyal Customer'
    elif rfm_score >=421 and rfm_score < 433:
        return 'Becoming Loyal Customer'
    elif rfm_score >=344 and rfm_score < 421:
        return 'Recent Customer'
    elif rfm_score >=323 and rfm_score < 344:
        return 'Potential Customer'
    elif rfm_score >=311 and rfm_score < 323:
        return 'Late Bloomer'
    elif rfm_score >=224 and rfm_score < 311:
        return 'Loosing Customer'
    elif rfm_score >=212 and rfm_score < 224:
        return 'High Risk Customer'
    elif rfm_score >=124 and rfm_score < 212:
        return 'Almost Lost Customer'
    elif rfm_score >=112 and rfm_score < 124:
        return 'Evasive Customer'
    else :
        return 'Lost Customer'

cust_trans_rfm['detail_cust_title']=cust_trans_rfm[['rfm_score']].apply(cust_score_title_lkup, axis=1)

def get_rank(cols):
    
    title = cols[0]
    
    if title=='Platinum Customer':
        return 1
    elif title=='Very Loyal Customer':
        return 2
    elif title == 'Becoming Loyal Customer':
        return 3
    elif title == 'Recent Customer':
        return 4
    elif title=='Potential Customer':
        return 5
    elif title == 'Late Bloomer':
        return 6
    elif title == 'Loosing Customer':
        return 7
    elif title=='High Risk Customer':
        return 8
    elif title == 'Almost Lost Customer':
        return 9
    elif title == 'Evasive Customer':
        return 10
    else :
        return 11

cust_trans_rfm['rank']=cust_trans_rfm[['detail_cust_title']].apply(get_rank, axis=1)

# 3. Data Analysis and Exploration
### 3.1. Age Distributions of New vs. Old Customers


# Loading the New Customers Dataset
new_cust = pd.read_csv(r"D:\Sem 3\PRJ-1\NewCustomerList_Cleaned.csv")


### 3.2. Bike related purchases over last 3 years by gender
cust_bike_purchase_by_gender = cust_trans_rfm.groupby('gender').agg({'past_3_years_bike_related_purchases' : sum}
                                                                 ).reset_index()

total_records = cust_trans_rfm['past_3_years_bike_related_purchases'].sum()

cust_bike_purchase_by_gender['Percent_of_total'] = (cust_bike_purchase_by_gender['past_3_years_bike_related_purchases']
                                                        /total_records)*100

### 3.4. Wealth Segmentation by Age Group
#### New Customers

wealth_age_seg_new = new_cust.groupby(['wealth_segment', 'Age Group']).size().reset_index()

wealth_age_seg_new.rename(columns={0:'Number of Customers'}, inplace=True)



#### Old Customers


wealth_age_seg_old = cust_trans_rfm.groupby(['wealth_segment', 'Age_Group']).size().reset_index()
wealth_age_seg_old.rename(columns={0:'Number of Customers'}, inplace=True)

### 3.5. Car owner across each State

cust_addr_info = pd.read_csv(r"D:\Sem 3\PRJ-1\CustomerAddress_Cleaned.csv")

cust_trans_addr = pd.merge(cust_trans_rfm , cust_addr_info, left_on = 'customer_id' , 
                           right_on = 'customer_id', how='inner')

state_car_owners = cust_trans_addr[['state' , 'owns_car' , 'customer_id']].drop_duplicates().groupby(['state', 'owns_car']).size().reset_index()

state_car_owners.rename(columns={0:'Number of Customers'}, inplace=True)



# 4. RFM Analysis Scatter Plots

# 5. Customer Segment Distribution

cust_per_title = cust_trans_rfm[['detail_cust_title', 'customer_id','rank']].drop_duplicates().groupby(
    ['detail_cust_title','rank']).size().reset_index().sort_values('rank')

cust_per_title.rename(columns={0:'Number of Customers'}, inplace=True)

<div style="text-align:center; color:black">
    <h1>RFM Analysis Results</h1>
</div>

<div style="text-align:center;">
    <img src="https://miro.medium.com/v2/resize:fit:1316/0*0SOKL3yyBJJEaiF2" alt="Sprocket Central" style="width:50%; display:block; margin:auto;">
</div>

In [80]:
out = widgets.Output()

def generate_plot(figure_description):
    plt.figure(figsize=(10, 8))
    figure_number = next(key for key, value in figure_descriptions.items() if value == figure_description)
    
    if figure_number == 1:
        plt.figure(figsize=(10, 8))
        sns.distplot(new_cust['Age Group'], kde=False, bins=15)
        plt.xlabel('Age Group')
        plt.ylabel('Number of Customers')
        plt.title('Age Distribution among New Customers')
        return plt.gcf()
    
    elif figure_number == 2:
        plt.figure(figsize=(10,8))
        sns.distplot(cust_trans_rfm['Age_Group'], kde=False, bins=15)
        plt.xlabel('Age Group')
        plt.ylabel('Number of Customers')
        plt.title('Age Distribution among Old Customers')
        return plt.gcf()
    
    elif figure_number == 3:
        plt.figure(figsize=(8,5))
        sns.barplot(x='gender', y='Percent_of_total', data=cust_bike_purchase_by_gender)
        plt.xlabel('Gender')
        plt.ylabel('Percent of Total Purchases')
        plt.title('Bike related purchases over last 3 years by gender')
        return plt.gcf()
    
    elif figure_number == 4:
        plt.figure(figsize=(15,8))
        sns.countplot(x='job_industry_category', data=new_cust[~(new_cust['job_industry_category']=='Missing')])
        plt.xlabel('Job Industry')
        plt.ylabel('Number of Customers')
        plt.title('Job Industry Customer Distribution among New Customers')
        return plt.gcf()
    
    elif figure_number == 5:
        plt.figure(figsize=(15,8))
        sns.countplot(x='job_industry_category', data=cust_trans_rfm[~(cust_trans_rfm['job_industry_category']=='Missing')])
        plt.xlabel('Job Industry')
        plt.ylabel('Number of Customers')
        plt.title('Job Industry Customer Distribution among Old Customers')
        return plt.gcf()
    
    elif figure_number == 6:
        plt.figure(figsize=(15,8))
        sns.barplot(x='Age Group', y='Number of Customers', hue='wealth_segment', data=wealth_age_seg_new)
        plt.xlabel('Age Group')
        plt.ylabel('Number of Customers')
        plt.title('Wealth Segmentation by Age Group of New Customers')
        return plt.gcf()
    
    elif figure_number == 7:
        plt.figure(figsize=(15,8))
        sns.barplot(x='Age_Group', y='Number of Customers', hue='wealth_segment', data=wealth_age_seg_old)
        plt.xlabel('Age Group')
        plt.ylabel('Number of Customers')
        plt.title('Wealth Segmentation by Age Group in Old Customers')
        return plt.gcf()
    
    elif figure_number == 8:
        plt.figure(figsize=(8,7))
        sns.barplot(x='state', y='Number of Customers', hue='owns_car', data=state_car_owners)
        plt.xlabel('States')
        plt.ylabel('Number of Customers')
        plt.title('Number of Customers who own a car')
        return plt.gcf()
    
    elif figure_number == 9:
        plt.figure(figsize=(8,7))
        cust_trans_rfm.plot.scatter(x='recency', y='monetary')
        plt.xlabel('Recency')
        plt.ylabel('Monetary ($)')
        plt.title('Recency vs Monetary')
        return plt.gcf()
    
    elif figure_number == 10:
        plt.figure(figsize=(8,7))
        cust_trans_rfm.plot.scatter(x='frequency', y='monetary')
        plt.xlabel('Frequency')
        plt.ylabel('Monetary ($)')
        plt.title('Frequency vs Monetary')
        return plt.gcf()
    
    elif figure_number == 11:
        plt.figure(figsize=(15,8))
        sns.barplot(y='detail_cust_title', x='Number of Customers', data=cust_per_title)
        plt.xlabel('Number of Customers')
        plt.ylabel('Customer Segment')
        plt.title('Number of Customers by Customer Segment')
        return plt.gcf()

# Map figure descriptions to their corresponding figure numbers
figure_descriptions = {
    1: 'Gender Distribution in Bike Purchases',
    2: 'Percent of Total Purchases by Gender',
    3: 'Percent of Total Purchases by Age Group',
    4: 'Job Industry Customer Distribution among New Customers',
    5: 'Job Industry Customer Distribution among Old Customers',
    6: 'Wealth Segmentation by Age Group of New Customers',
    7: 'Wealth Segmentation by Age Group in Old Customers',
    8: 'Number of Customers who own a car',
    9: 'Recency vs Monetary',
    10: 'Frequency vs Monetary',
    11: 'Number of Customers by Customer Segment',
}

# Display a dropdown menu with figure options
dropdown = widgets.Dropdown(options=figure_descriptions.values(), value=list(figure_descriptions.values())[0], description='Figure:')

# Create a function to handle dropdown changes
def on_dropdown_change(change):
    with out:
        clear_output(wait=True)  # Clear the output area
        figure_description = change.new
        fig = generate_plot(figure_description)
        plt.show(fig)  # Display the figure using plt.show()


# Connect the dropdown menu to the callback function
dropdown.observe(on_dropdown_change, names='value')

# Display the dropdown menu and the output area
display(dropdown, out)


Dropdown(description='Figure:', options=('Gender Distribution in Bike Purchases', 'Percent of Total Purchases …

Output()