In [9]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [10]:
#importing csv dataset
df = pd.read_csv('./csv/dataset_2019_2022.csv')
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 0)
df.columns

Index(['customer_id', 'product_id', 'basket_id', 'loyalty', 'household_type',
       'age_band', 'department', 'brand', 'commodity', 'store', 'price',
       'transaction_date'],
      dtype='object')

In [11]:
df.transaction_date = pd.to_datetime(df.transaction_date, format='%d/%m/%Y')
df['year'] = df.transaction_date.dt.year
df['month'] = df.transaction_date.dt.month
df['day'] = df.transaction_date.dt.day
# df.columns
df.head()

Unnamed: 0,customer_id,product_id,basket_id,loyalty,household_type,age_band,department,brand,commodity,store,price,transaction_date,year,month,day
0,15803,1131974,57266,Loyalist,1 adult with kids,19-24,Grocery,private,Baked bread/buns/rolls,374,0.99,2020-10-05,2020,10,5
1,15803,1051516,57266,Loyalist,1 adult with kids,19-24,Produce,national,Vegetables - all others,374,0.7,2020-10-24,2020,10,24
2,15803,967254,57266,Loyalist,1 adult with kids,19-24,Pharmaceutical,national,Cold and flu,374,1.68,2020-10-18,2020,10,18
3,15803,1134222,57266,Loyalist,1 adult with kids,19-24,Grocery,private,Paper housewares,374,2.59,2020-10-23,2020,10,23
4,15803,1003421,57266,Loyalist,1 adult with kids,19-24,Grocery,national,Soup,374,0.6,2020-10-27,2020,10,27


In [12]:
loyalties = df.groupby(['year', 'loyalty']).size().unstack().reset_index()
loyalties

loyalty,year,First Time Buyer,Loyalist,Promiscuous
0,2019,408,8354,13891
1,2020,429,9150,13496
2,2021,241,10463,12401
3,2022,56,3706,5155


In [66]:
#TODO create a graph that showcases the total revenue of each loyalties per year 
#* Preferrably line chart with 3 different lines (1 per loyalties)
#* x => year, y => total_revenue

revenue_per_loyalites = df.groupby(['year', 'loyalty']).agg(total_revenue=('price', 'sum')).sort_values('loyalty', ascending=False).reset_index()

loyalty_types = [revenue_per_loyalites.loyalty.unique()]

ftb_revenue = revenue_per_loyalites[revenue_per_loyalites.loyalty == 'First Time Buyer']
loyalist_revenue = revenue_per_loyalites[revenue_per_loyalites.loyalty == 'Loyalist']
prmscs_revenue = revenue_per_loyalites[revenue_per_loyalites.loyalty == 'Promiscuous']

fig = px.line(revenue_per_loyalites, x='year', y='total_revenue', color='loyalty', text='total_revenue', markers=True, width=950, title='Total Revenue')
fig.update_layout(xaxis= {'tickvals': [*range(int(revenue_per_loyalites['year'].min()), int(revenue_per_loyalites['year'].max() + 1))]})
fig.update_traces(textposition="top right")
fig.show()


In [43]:
#TODO create a chart/table that would showcase the top 5 most selling points for each loyalties
#* purpose to figure out which commodities can be used to convert non loyal customers to loyal customers

revenue_per_commodities = df.groupby(['loyalty', 'commodity']).agg(total_bought=('price', 'sum')).reset_index()
top_commodities_per_loyalty = pd.concat([revenue_per_commodities[revenue_per_commodities.loyalty == loyalty].sort_values('total_bought', ascending=False).head(10) for loyalty in revenue_per_commodities.loyalty.unique()])

top_commodities_in_ftb = top_commodities_per_loyalty[top_commodities_per_loyalty.loyalty == 'First Time Buyer']
top_commodities_in_prom = top_commodities_per_loyalty[top_commodities_per_loyalty.loyalty == 'Promiscuous'] 
top_commodities_in_loy = top_commodities_per_loyalty[top_commodities_per_loyalty.loyalty == 'Loyalist'] 

Unnamed: 0,loyalty,commodity,total_bought
15,First Time Buyer,Beef,222.31
78,First Time Buyer,Frozen meat,97.96
108,First Time Buyer,Lunch meat,93.96
151,First Time Buyer,Seafood-frozen,86.98
37,First Time Buyer,Chicken,79.81
36,First Time Buyer,Cheese,71.43
135,First Time Buyer,Pork,71.01
162,First Time Buyer,Soft drinks,70.84
48,First Time Buyer,Deli meats,69.3
147,First Time Buyer,Salad,66.77


In [62]:
#* FIRST TIME BUYER

fig_ftb = px.bar(top_commodities_in_ftb, x='commodity', y='total_bought', width=750, title='First Time Buyers')
fig_ftb.show()

In [63]:
#* Promiscuous

fig_prom = px.bar(top_commodities_in_prom, x='commodity', y='total_bought', width=750, title='Promiscuous')
fig_prom.show()

In [64]:
#* Loyalist

fig_loy = px.bar(top_commodities_in_loy, x='commodity', y='total_bought', width=750, title='Loyalist')
fig_loy.show()

In [108]:
#TODO create a chart that would showcase the total customers and the percentage of types of loyal
#* purpose is to know the current customers

total_customers = df.groupby(['loyalty']).agg(total_customer=('customer_id', pd.Series.nunique)).reset_index()
total_count_customers = total_customers['total_customer'].sum()

fig_customers = go.Figure(data=[go.Pie(labels=total_customers['loyalty'], values=total_customers['total_customer'], hole=.4, title=str(total_count_customers))])
fig_customers.update_layout(title_text='Total Customers', font_size=12, width=500)
fig_customers.show() 


In [84]:
total_customers = df.groupby(['loyalty']).agg(total_customer=('customer_id', pd.Series.nunique)).reset_index()
total_stuff = total_customers['total_customer'].sum()
total_stuff

np.int64(3268)

In [16]:
#TODO create a chart showing the times where they buy the company's products



In [53]:
total_num_2022 = df[df.year == 2022]
total_num_2022

Unnamed: 0,customer_id,product_id,basket_id,loyalty,household_type,age_band,department,brand,commodity,store,price,transaction_date,year,month,day
28467,22001,866573,75400,Promiscuous,1 adult with kids,19-24,Grocery,private,Frozen breakfast foods,374,2.00,2022-05-24,2022,5,24
28468,22001,1108094,75400,Promiscuous,1 adult with kids,19-24,Grocery,national,Beers/ales,374,11.99,2022-05-16,2022,5,16
28469,22001,850652,75400,Promiscuous,1 adult with kids,19-24,Grocery,private,Dry beans/vegetables/potato/rice,374,1.33,2022-05-05,2022,5,5
28470,22001,1046756,75400,Promiscuous,1 adult with kids,19-24,Produce,national,Mushrooms,374,2.29,2022-05-11,2022,5,11
28471,22001,9527187,75400,Promiscuous,1 adult with kids,19-24,Meat,national,Frozen meat,374,7.98,2022-05-27,2022,5,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68553,28995,1068715,65660,Promiscuous,Single male,19-24,Produce,national,Vegetables - all others,374,1.89,2022-01-24,2022,1,24
68554,28995,933835,65660,Promiscuous,Single male,19-24,Deli,national,Deli meats,374,2.30,2022-01-17,2022,1,17
68555,28995,1068715,65660,Promiscuous,Single male,19-24,Produce,national,Vegetables - all others,374,1.89,2022-01-22,2022,1,22
68556,28995,831181,65660,Promiscuous,Single male,19-24,Seafood,private,Seafood-fresh,374,2.99,2022-01-21,2022,1,21
