In [74]:
#Library for edit dataset
import pandas as pd
import numpy as np
import datetime as dp

#Library for visualization
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import folium 
from folium.plugins import StripePattern
import branca.colormap
from collections import defaultdict
from folium.plugins import HeatMap

#Calculate distance on latitude and longitude
from math import radians, cos, sin, asin, sqrt

#Library to find correlation in categorical data
from pandas import factorize

# Modeling
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import warnings
from sklearn.utils import resample
warnings.filterwarnings('ignore')


# **Analysis Segementation**

In this part we want explain our customers segementation, after we apply best method for segementation with Kmeans with 6 customers segementation based on Sillhoutte Score.

#### **Set Our Dataset, Preprocess, Scaled, Modeling Kmeans**

In [75]:
# Set Our DataSet

# Change the product name to english
products = pd.read_csv("olist_products_dataset.csv")
prod_cat_name_tr = pd.read_csv("product_category_name_translation.csv")
products = products.merge(prod_cat_name_tr, on='product_category_name', how='left')
products.drop ('product_category_name', axis=1, inplace=True)


#load all the tables
o_items = pd.read_csv("olist_order_items_dataset.csv")
orders = pd.read_csv("olist_orders_dataset.csv")
o_reviews = pd.read_csv("olist_order_reviews_dataset.csv")
geoloc = pd.read_csv("olist_geolocation_dataset.csv")
sellers = pd.read_csv("olist_sellers_dataset.csv")
o_payments = pd.read_csv("olist_order_payments_dataset.csv")
customers = pd.read_csv("olist_customers_dataset.csv")

# Join the needed table
df = orders.merge(o_items, on='order_id', how='left')
df = df.merge(o_payments, on='order_id', how='outer', validate='m:m')
df = df.merge(products, on='product_id', how='outer')
df = df.merge(customers, on='customer_id', how='outer')
df = df.merge(sellers, on='seller_id', how='outer')

In [76]:
# remove columns we don't need
df = df.drop(axis=1,
columns=['product_name_lenght',
'product_description_lenght',
'product_photos_qty',
'product_weight_g',
'product_length_cm',
'product_height_cm',
'product_width_cm',
'order_delivered_carrier_date',
'order_delivered_customer_date',
'order_estimated_delivery_date'
,'shipping_limit_date','order_approved_at'])

In [77]:
# Change to Datetime
df['order_purchase_date']=pd.to_datetime(df['order_purchase_timestamp'])

df['order_purchase_date'] = pd.to_datetime(df['order_purchase_timestamp'])
df['order_purchase_date'] = df['order_purchase_date'].dt.strftime('%Y-%m-%d-%H')
df['order_purchase_date'] = pd.to_datetime(df['order_purchase_date'])

In [78]:
df['year'] = df['order_purchase_date'].dt.year
df['month'] = df['order_purchase_date'].dt.strftime('%b')
df['day'] = df['order_purchase_date'].dt.day

In [79]:
# Preprocessing

df = df[(df['payment_type'] != 'not_defined') | (df['seller_city'] != '04482255') | (df['seller_city'] != 'vendas@creditparts.com.br') |
        (df['seller_city'] != 'rio de janeiro / rio de janeiro') | (df['seller_city'] != 'sao paulo / sao paulo') | (df['seller_city'] != 'rio de janeiro \\rio de janeiro') 
        | (df['seller_city'] != 'ribeirao preto / sao paulo' ) | (df['seller_city'] != 'sp') | (df['seller_city'] != 'carapicuiba / sao paulo') | (df['seller_city'] != 'mogi das cruzes / sp')
        | (df['seller_city'] != 'sp / sp') | (df['seller_city'] != 'auriflama/sp') | (df['seller_city'] != 'pinhais/pr') | (df['seller_city'] != 'cariacica / es') | (df['seller_city'] != 'jacarei / sao paulo')
        | (df['seller_city'] != 'sao sebastiao da grama/sp') | (df['seller_city'] != 'maua/sao paulo') | (df['seller_city'] != ' ') | (df['seller_city'] != 'lages - sc') 
        | (df['year'] != 2016) | (df['order_purchase_timestamp'] != '2018-09-03 09:06:57')]

In [80]:
df.dropna(axis=0,inplace=True,subset=['product_id', 'seller_id', 'payment_sequential','payment_type', 'payment_installments', 'payment_value'])

In [81]:
df['product_category_name_english'].fillna('Missing', inplace = True)

In [82]:
# Set RFM Analysis Data set
recency = df[['customer_unique_id', 'order_purchase_date']].copy()
recency = recency.groupby('customer_unique_id')['order_purchase_date'].max().reset_index()
recency.columns = ['customer_unique_id', 'last_purchase_timestamp']
# Recency, Frequency, Monetary
recency['purchase_int'] = (recency['last_purchase_timestamp'].max() - recency['last_purchase_timestamp']).dt.days
recency.drop(columns='last_purchase_timestamp', inplace=True)
frequency =pd.DataFrame(df.groupby('customer_unique_id')['order_id'].count().reset_index())
monetary =pd.DataFrame(df.groupby('customer_unique_id')['payment_value'].sum().reset_index())
# Merge
overall = recency.merge(frequency, on='customer_unique_id')
overall = overall.merge(monetary, on='customer_unique_id')

# Rename columns for better intepretation
overall.rename(columns={ 'purchase_int':'recency', 'num_transaction':'frequency','payment_value':'monetary','order_id':'frequency'}, inplace=True)

overall



Unnamed: 0,customer_unique_id,recency,frequency,monetary
0,0000366f3b9a7992bf8c76cfdf3221e2,115,1,141.90
1,0000b849f77a49e4a4ce2b2a4ca5be3f,118,1,27.19
2,0000f46a3911fa3c0805444483337064,541,1,86.22
3,0000f6ccb0745a6a4b88665a16c9f078,325,1,43.62
4,0004aac84e0df4da2b147fca70cf8255,292,1,196.89
...,...,...,...,...
95414,fffcf5a5ff07b0908bd4e2dbc735a684,451,2,4134.84
95415,fffea47cd6d3cc0a88bd621562a9d061,266,1,84.58
95416,ffff371b4d645b6ecea244b27531430a,572,1,112.46
95417,ffff5962728ec6157033ef9805bacc48,123,1,133.69


In [83]:
scaled_features = overall[['customer_unique_id','recency','frequency','monetary']].copy()

col_names = ['monetary', 'recency','frequency']
features = scaled_features[col_names]
scaler = StandardScaler().fit(features.values)
features = scaler.transform(features.values)

scaled_features[col_names] = features
scaled_features

Unnamed: 0,customer_unique_id,recency,frequency,monetary
0,0000366f3b9a7992bf8c76cfdf3221e2,-0.833414,-0.282446,-0.110526
1,0000b849f77a49e4a4ce2b2a4ca5be3f,-0.813826,-0.282446,-0.289269
2,0000f46a3911fa3c0805444483337064,1.948149,-0.282446,-0.197287
3,0000f6ccb0745a6a4b88665a16c9f078,0.537779,-0.282446,-0.263667
4,0004aac84e0df4da2b147fca70cf8255,0.322306,-0.282446,-0.024840
...,...,...,...,...
95414,fffcf5a5ff07b0908bd4e2dbc735a684,1.360495,0.932534,6.111335
95415,fffea47cd6d3cc0a88bd621562a9d061,0.152539,-0.282446,-0.199843
95416,ffff371b4d645b6ecea244b27531430a,2.150564,-0.282446,-0.156400
95417,ffff5962728ec6157033ef9805bacc48,-0.781178,-0.282446,-0.123319


In [84]:
# Segmentation Based on RFM with KMeans
kmeans = KMeans(n_clusters=6,random_state= 0)
kmeans.fit(scaled_features[['monetary', 'recency','frequency']])

KMeans(n_clusters=6, random_state=0)

In [85]:
scaled_features['k_means_clust'] = kmeans.labels_
scaled_features


Unnamed: 0,customer_unique_id,recency,frequency,monetary,k_means_clust
0,0000366f3b9a7992bf8c76cfdf3221e2,-0.833414,-0.282446,-0.110526,1
1,0000b849f77a49e4a4ce2b2a4ca5be3f,-0.813826,-0.282446,-0.289269,1
2,0000f46a3911fa3c0805444483337064,1.948149,-0.282446,-0.197287,0
3,0000f6ccb0745a6a4b88665a16c9f078,0.537779,-0.282446,-0.263667,0
4,0004aac84e0df4da2b147fca70cf8255,0.322306,-0.282446,-0.024840,0
...,...,...,...,...,...
95414,fffcf5a5ff07b0908bd4e2dbc735a684,1.360495,0.932534,6.111335,4
95415,fffea47cd6d3cc0a88bd621562a9d061,0.152539,-0.282446,-0.199843,1
95416,ffff371b4d645b6ecea244b27531430a,2.150564,-0.282446,-0.156400,0
95417,ffff5962728ec6157033ef9805bacc48,-0.781178,-0.282446,-0.123319,1


In [86]:
scaled_features['k_means_clust'] = kmeans.labels_
scaled_features


Unnamed: 0,customer_unique_id,recency,frequency,monetary,k_means_clust
0,0000366f3b9a7992bf8c76cfdf3221e2,-0.833414,-0.282446,-0.110526,1
1,0000b849f77a49e4a4ce2b2a4ca5be3f,-0.813826,-0.282446,-0.289269,1
2,0000f46a3911fa3c0805444483337064,1.948149,-0.282446,-0.197287,0
3,0000f6ccb0745a6a4b88665a16c9f078,0.537779,-0.282446,-0.263667,0
4,0004aac84e0df4da2b147fca70cf8255,0.322306,-0.282446,-0.024840,0
...,...,...,...,...,...
95414,fffcf5a5ff07b0908bd4e2dbc735a684,1.360495,0.932534,6.111335,4
95415,fffea47cd6d3cc0a88bd621562a9d061,0.152539,-0.282446,-0.199843,1
95416,ffff371b4d645b6ecea244b27531430a,2.150564,-0.282446,-0.156400,0
95417,ffff5962728ec6157033ef9805bacc48,-0.781178,-0.282446,-0.123319,1


In [87]:
scaled_features['k_means_clust'].value_counts()

1    47860
0    35761
3    10821
4      950
2       26
5        1
Name: k_means_clust, dtype: int64

In [88]:
#assign customer segmentation based on overall score
ksegment_lst = scaled_features['k_means_clust']
ksegment_cluster = []

for i in ksegment_lst:
    if i == 1:
        cluster = 'passive'
        ksegment_cluster.append(cluster)
    elif i == 0:
        cluster = 'regular'
        ksegment_cluster.append(cluster)
    elif i == 3:
        cluster = 'occation'
        ksegment_cluster.append(cluster)
    elif i == 4:
        cluster = 'valuable'
        ksegment_cluster.append(cluster)
    elif i == 2:
        cluster = 'loyal'
        ksegment_cluster.append(cluster)
    else:
        cluster = 'best'
        ksegment_cluster.append(cluster)
        

In [89]:
scaled_features['k_means_segment'] = ksegment_cluster

## **Analysis Customers Based on Segementation**

In [90]:
# Merging Kmeans Segement for location & RFM before scaled

segment = scaled_features[['customer_unique_id','k_means_segment']].merge(df[['customer_unique_id','payment_type','customer_city','customer_state','year','month','product_category_name_english']], how='outer', on='customer_unique_id')
segment = segment.merge(overall, how='outer', on='customer_unique_id')
segment = segment.drop_duplicates(subset=['customer_unique_id'])
segment.head(5)


Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
0,0000366f3b9a7992bf8c76cfdf3221e2,passive,credit_card,cajamar,SP,2018,May,bed_bath_table,115,1,141.9
1,0000b849f77a49e4a4ce2b2a4ca5be3f,passive,credit_card,osasco,SP,2018,May,health_beauty,118,1,27.19
2,0000f46a3911fa3c0805444483337064,regular,credit_card,sao jose,SC,2017,Mar,stationery,541,1,86.22
3,0000f6ccb0745a6a4b88665a16c9f078,regular,credit_card,belem,PA,2017,Oct,telephony,325,1,43.62
4,0004aac84e0df4da2b147fca70cf8255,regular,credit_card,sorocaba,SP,2017,Nov,telephony,292,1,196.89


In [91]:
segment.shape

(95419, 11)

### **Passive Customers**

In [92]:
# Numerikal Features Analysis

passive=segment[segment['k_means_segment'] == 'passive']
passive.describe()

Unnamed: 0,year,recency,frequency,monetary
count,47860.0,47860.0,47860.0,47860.0
mean,2017.933326,135.400293,1.0,148.429274
std,0.249459,74.605649,0.0,181.563719
min,2017.0,4.0,1.0,10.89
25%,2018.0,70.0,1.0,58.62
50%,2018.0,137.0,1.0,100.34
75%,2018.0,198.0,1.0,167.86
max,2018.0,269.0,1.0,2512.53


**Passive Characteristic Based On Numerikal Features**

1.  Average Recency: 135 Days
    , Passive customers last purchase average was 135 days
2.  Average Frequency: 1
    , Passive customers only buy item once in our e-commerce
3.  Average Monetary: 148
    , Passive customers average spending in our e-commerce was R$148
4.  Most Transaction Passive customers was in 2018

In [94]:
# having the least spend on passive customers segment
passive[passive['monetary'] == passive['monetary'].describe()[3]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
82509,b33336f46234b24a613ad9064d13106d,passive,credit_card,sao paulo,SP,2018,Jun,auto,73,1,10.89


**The least passive customers spend are R$ 10.89**
 
    - buy in the last 73 days
    - transaction using credit card
    - In City Sao Paulo
    - Buy Auto

In [95]:
# having the highest spend on passive customers segment
passive[passive['monetary'] == passive['monetary'].describe()[7]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
1281,02ccceb879088a0253e1e46e0200bdb8,passive,debit_card,birigui,SP,2018,Aug,housewares,25,1,2512.53


**The highest passive customers spend are R$ 2512.53**

    - buy in the last 25 days
    - transaction using debit card
    - in City Birigui
    - Buy Housewares

In [96]:
ordered_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# sorting data accoring to ordered_months
passive['months']=passive['month'].apply(lambda x:ordered_months.index(x))
passive = passive.sort_values('months')

In [97]:
exclude_2017 = passive[passive['year'] != 2017]
agg= exclude_2017.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
3,Jan,0,2018,837334.87
2,Feb,1,2018,750909.82
6,Mar,2,2018,897547.07
0,Apr,3,2018,898722.03
7,May,4,2018,859637.26
5,Jun,5,2018,796150.3
4,Jul,6,2018,816752.4
1,Aug,7,2018,804050.47


**Exclude 2017 Year because most passive customers transaction in 2018**

In [98]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Passive Customers')

**Most Passive customers spend money in 2018 February and March ,There a decline gross sales in January and Mei we should investigate more**

In [99]:
# 10 State most of passive customers
top_10_customer_state = passive['customer_state'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_state = passive[passive['customer_state'].isin(top_10_customer_state)]

In [100]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_state['customer_state'].value_counts()/len(passive['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**As we know in Brazil Sao Paulo are the most densed State, it is normal 43% of our Passive customers are from Sao Paolo,its very densed in Sao Paulo**

In [101]:
# 10 City most of passive customers
top_10_customer_city = passive['customer_city'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_city = passive[passive['customer_city'].isin(top_10_customer_city)]

In [102]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_city['customer_city'].value_counts()/len(passive['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='City')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Since Most our Customers from Sao Paulo State, ofcourse the capital city of this state will having the highest transaction**

In [103]:
df_price_cat = passive['payment_type'].value_counts()/len(passive['payment_type'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.show()

**Our most used payment method in passive Customers are Credit Card used 77%  and Boleto 19.5% for trasaction**

In [104]:
top_10_product = passive['product_category_name_english'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_product = passive[passive['product_category_name_english'].isin(top_10_product)]

plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_product['product_category_name_english'].value_counts()/len(passive['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='Category')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Most Product Passive Customers Buy are Bed Bath Table (8.2%), Health Beauty (10.17%), Sport Leisure(7.4%),etc**

#### **Conclusion Behavior Passive Customers**

    - Most of them in State: Sao Paolo, City: Sao Paolo
    - Most of them using Credit Cards(77%), a part of them using Boleto(19%)
    - Favorite Category: Bed Bath Table, Health Beauty, Sport Leisure, Watches Gift & Computers accesories
    - Most transaction in 2018
    - Their numbers decline

    Passive Characteristic Based On Numerikal Features
        1.  Average Recency: 135 Days
            Passive customers last purchase average was 135 days
        2.  Average Frequency: 1
            Passive customers only buy item once in our e-commerce
        3.  Average Monetary: 148
            Passive customers average spending in our e-commerce was R$148
        4.  Most Transaction Passive customers was in 2018
    - The least spend are R$ 10.89
    - The highest spend are R$ 2512.53
    

### **Regular Customers**


In [105]:
# Numerikal Features Analysis

regular=segment[segment['k_means_segment'] == 'regular']
regular.describe()

Unnamed: 0,year,recency,frequency,monetary
count,35761.0,35761.0,35761.0,35761.0
mean,2016.991863,401.560806,1.073068,160.862412
std,0.089841,95.049626,0.263031,205.239006
min,2016.0,266.0,1.0,10.07
25%,2017.0,318.0,1.0,61.05
50%,2017.0,391.0,1.0,103.32
75%,2017.0,473.0,1.0,178.27
max,2017.0,728.0,3.0,4764.34


**Regular Characteristic Based On Numerikal Features**

        1.  Average Recency: 405 Days
            Regular customers last purchase average was 135 days
        2.  Average Frequency: 1.13
            Regular customers only buy item once in our e-commerce
        3.  Average Monetary: 168
            Regular customers average spending in our e-commerce was R$168.8
        4.  Most Transaction regular customers was in 2017

In [106]:
# having the least spend on regular customers segment
regular[regular['monetary'] == regular['monetary'].describe()[3]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
86877,bd06ce0e06ad77a7f681f1a4960a3cc6,regular,credit_card,sao paulo,SP,2017,Sep,stationery,354,1,10.07


**The least regular customers spend are R$ 10.07**
 
    - buy in the last 354 days
    - transaction using credit card
    - In City Sao Paulo
    - Buy Stationery

In [107]:
# having the highest spend on regular customers segment
regular[regular['monetary'] == regular['monetary'].describe()[7]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
109821,eebb5dda148d3893cdaf5b5ca3040ccb,regular,credit_card,maua,SP,2017,Apr,small_appliances,502,1,4764.34


**The least regular customers spend are R$ 4764.34**
 
    - buy in the last 503 days
    - transaction using credit card
    - In City Maua
    - Buy Small Appliances

In [108]:
ordered_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# sorting data accoring to ordered_months
regular['months']=regular['month'].apply(lambda x:ordered_months.index(x))
regular = regular.sort_values('months')

In [109]:
exclude_2016 = regular[regular['year'] != 2016]
agg= exclude_2016.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
4,Jan,0,2017,133910.66
3,Feb,1,2017,286334.52
7,Mar,2,2017,423894.61
0,Apr,3,2017,400866.07
8,May,4,2017,555273.5
6,Jun,5,2017,485040.08
5,Jul,6,2017,561143.53
1,Aug,7,2017,597225.74
11,Sep,8,2017,559373.73
10,Oct,9,2017,578230.2


**Reason Exclude 2016 the company data are so few**

In [110]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Regular Customers')

**The most highest spend in Regular Customers are in Nov, in our analysis Novermber was Black Friday time, after Black Friday over people din't so much spend money in December**

In [111]:
# 10 State most of passive customers
top_10_customer_state = regular['customer_state'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_state = regular[regular['customer_state'].isin(top_10_customer_state)]

In [112]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_state['customer_state'].value_counts()/len(regular['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers State')
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Percent Customers')

fig.show()

<Figure size 576x432 with 0 Axes>

**As we know in Brazil Sao Paulo are the most densed State, it is normal 54% of our Regular customers are from Sao Paolo,its very densed in Sao Paulo**

In [113]:
# 10 City most of passive customers
top_10_customer_city = regular['customer_city'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_city = regular[regular['customer_city'].isin(top_10_customer_city)]

In [114]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_city['customer_city'].value_counts()/len(regular['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers City')
fig.update_xaxes(title_text='City')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Our Regular Customers As we can see not heavly dense in one city but scatter in many city of Sao Paulo State, but the 2nd Biggest transaction in Rio de Janeiro which din't belong in Sao Paulo State**

In [115]:
df_price_cat = regular['payment_type'].value_counts()/len(regular['payment_type'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.show()

**Credit Card and Boleto still are the most popular payment method**

In [116]:
top_10_product = regular['product_category_name_english'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_product = regular[regular['product_category_name_english'].isin(top_10_product)]

plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_product['product_category_name_english'].value_counts()/len(regular['product_category_name_english'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='Category')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Favorite buy category for regular customers are bed bath table(9.6%), sport leisure(8.1%), health beauty(7.7%), furniture decor(6.9%), computers accesories(5.8%), etc**

#### **Conclusion Behavior Regular Customers**

    - Most of them in State: Sao Paolo, Customers city are scatter around in Sao Paolo State but 2nd biggest are in Rio De janeiro that not in Sao Paolo State
    - Most of them using Credit Cards(75%), a part of them using Boleto(21%)
    - Favorite Category: Bed Bath Table, Health Beauty, Sport Leisure, Furniture Decor & Computers accesories
    - Most transaction in 2017
    - Their treds numbers decline

    Regular Characteristic Based On Numerikal Features

        1.  Average Recency: 405 Days
            Regular customers last purchase average was 135 days
        2.  Average Frequency: 1.13
            Regular customers only buy item once in our e-commerce
        3.  Average Monetary: 168
            Regular customers average spending in our e-commerce was R$168.8
        4.  Most Transaction regular customers was in 2017
    - The least spend are R$ 10.07
    - The highest spend are R$ 4764.34
    

### **Occation Customers**

In [117]:
# Numerikal Features Analysis

occation=segment[segment['k_means_segment'] == 'occation']
occation.describe()

Unnamed: 0,year,recency,frequency,monetary
count,10821.0,10821.0,10821.0,10821.0
mean,2017.650494,191.126144,2.326957,428.466403
std,0.477611,114.44716,0.637349,441.822902
min,2016.0,0.0,1.0,9.59
25%,2017.0,102.0,2.0,164.2
50%,2018.0,185.0,2.0,297.7
75%,2018.0,277.0,2.0,520.43
max,2018.0,696.0,5.0,4681.78


**Occation Characteristic Based On Numerikal Features**

        1.  Average Recency: 198 Days
            Occation customers last purchase average was 198 days
        2.  Average Frequency: 2.5
            In average Occation customers buy item between 2 - 3 transaction in our e-commerce
        3.  Average Monetary: 445
            Occation customers average spending in our e-commerce was R$445.3
        4.  Most Transaction occation customers was in 2017 - 2018

In [118]:
# having the least spend onoccation customers segment
occation[occation['monetary'] == occation['monetary'].describe()[3]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
22743,317cfc692e3f86c45c95697c61c853a6,occation,credit_card,paulinia,SP,2018,Aug,health_beauty,8,2,9.59


**The least occcation customers spend are R$ 9.59**
 
    - buy in the last 8 days
    - transaction using credit card & voucher
    - In City Paulinia
    - Buy Health Beauty
    - 2 transaction

In [144]:
# having the highest spend onoccation customers segment
occation[occation['monetary'] == occation['monetary'].describe()[7]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary,months
33399,48e1ac109decbb87765a3eade6854098,occation,credit_card,joao pessoa,PB,2018,Jun,computers,72,1,4681.78,5


**The highest occation customers spend are R$ 4681.78**
 
    - buy in the last 72 days
    - transaction using credit card 
    - In City joao pessoa	
    - Buy Computers
    - 1 transaction

In [120]:
ordered_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# sorting data accoring to ordered_months
occation['months']=occation['month'].apply(lambda x:ordered_months.index(x))
occation = occation.sort_values('months')

In [121]:
exclude_2016 = occation[occation['year'] != 2016]
agg= exclude_2016.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
7,Jan,0,2017,15732.94
8,Jan,0,2018,410012.86
5,Feb,1,2017,24509.1
6,Feb,1,2018,349504.94
14,Mar,2,2018,384544.49
13,Mar,2,2017,41987.46
0,Apr,3,2017,34772.57
1,Apr,3,2018,390586.13
16,May,4,2018,407810.18
15,May,4,2017,78660.19


In [122]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Regular Customers')

**The most highest spend in Occation Customers are in Nov, in our analysis Novermber was Black Friday time, after Black Friday over people din't so much spend money in December, Unlike Regular customer there no transaction in 2018, Occation customers still doing transaction recently**

In [123]:
# 10 State most of occation customers
top_10_customer_state = occation['customer_state'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_state = occation[occation['customer_state'].isin(top_10_customer_state)]

In [124]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_state['customer_state'].value_counts()/len(occation['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers State')
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Percent Customers')

fig.show()

<Figure size 576x432 with 0 Axes>

**43% Occation customers is in Sao Paulo followed by Rio Janeiro (13.9%), Minas Gerais (11.2%), etc, as we can see most of our customers always reside in heavly dense population state**

In [125]:
# 10 City most of occationcustomers
top_10_customer_city = occation['customer_city'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_city = occation[occation['customer_city'].isin(top_10_customer_city)]

In [126]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_city['customer_city'].value_counts()/len(occation['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers City')
fig.update_xaxes(title_text='City')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Most of occation customers are in capital city of the state**

In [127]:
df_price_cat = occation['payment_type'].value_counts()/len(occation['payment_type'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.show()

**Credit card and Boleto still famous payment method, but in occation customers voucher is often being used too**

In [128]:
top_10_product = occation['product_category_name_english'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_product = occation[occation['product_category_name_english'].isin(top_10_product)]

plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_product['product_category_name_english'].value_counts()/len(occation['product_category_name_english'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='Category')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Favorite buy category for occation customers are bed bath table(13.7%), furniture decor(10.6%), sports leisure(7.6%), etc**

#### **Conclusion Behavior Occation Customers**

    - Most of them in State: Sao Paolo, Customers city are scatter around capital city of the state
    - Most of them using Credit Cards(70%), a part of them using Boleto(16.5%), voucher is often used too (12%).
    - Favorite Category: Bed Bath Table, Furniture Decor, Sport Leisure, Computers accesories & Health Beauty
    - Most transaction in 2017 - 2018
    - Their treds numbers decline

    Occation Characteristic Based On Numerikal Features

        1.  Average Recency: 198 Days
            Occation customers last purchase average was 198 days
        2.  Average Frequency: 2.5
            In average Occation customers buy item between 2 - 3 transaction in our e-commerce
        3.  Average Monetary: 445
            Occation customers average spending in our e-commerce was R$445.3
        4.  Most Transaction occation customers was in 2017 - 2018
    - The least spend are R$ 9.59
    - The highest spend are R$ 520
    

### **Valuable Customers**

In [129]:
# Numerikal Features Analysis

valuable=segment[segment['k_means_segment'] == 'valuable']
valuable.describe()

Unnamed: 0,year,recency,frequency,monetary
count,950.0,950.0,950.0,950.0
mean,2017.506316,249.767368,6.129474,2296.198305
std,0.512707,159.135287,3.231998,2127.095975
min,2016.0,5.0,1.0,19.0
25%,2017.0,118.0,5.0,760.4775
50%,2018.0,223.0,6.0,1737.54
75%,2018.0,361.0,6.0,3087.74
max,2018.0,698.0,38.0,12490.88


**Valuable Characteristic Based On Numerikal Features**

        1.  Average Recency: 249 Days
            valuable customers last purchase average was 249 days
        2.  Average Frequency: 7.8
            In average valuable customers buy item between 7 - 8 transaction in our e-commerce
        3.  Average Monetary: 2091
            valuable customers average spending in our e-commerce was R$2091
        4.  Most Transaction valuable customers was in 2017 - 2018

In [160]:
# having the least spend valuable customers segment
valuable[valuable['monetary'] ==valuable['monetary'].describe()[3]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary,months
20071,2bca5fe01d46ab5cc2bc5aaade88d850,valuable,voucher,curitiba,PR,2017,Jul,computers_accessories,400,6,19.0,6


**The least valuable customers spend are R$ 19.0**
 
    - buy in the last 400 days
    - transaction using mostly voucher, 1 creditcard
    - In City Curitiba
    - Buy Computers accessories
    - 6 transaction

In [142]:
# having the highest spend valuable customers segment
valuable[valuable['monetary'] == valuable['monetary'].describe()[7]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
87585,be825ddd3b40db3f91bf05b4e9435d56,valuable,credit_card,salvador,BA,2018,Jun,computers,83,4,12490.88


**The highest valuable customers spend are R$ 12490.88**
 
    - buy in the last 83 days
    - transaction using credit card 
    - In City Salvador
    - Buy Computers
    - 4 transaction

In [146]:
ordered_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# sorting data accoring to ordered_months
valuable['months']=valuable['month'].apply(lambda x:ordered_months.index(x))
valuable = valuable.sort_values('months')

In [147]:
exclude_2016 = valuable[valuable['year'] != 2016]
agg= exclude_2016.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
7,Jan,0,2017,23799.3
8,Jan,0,2018,130750.97
5,Feb,1,2017,33129.15
6,Feb,1,2018,151699.32
14,Mar,2,2018,171090.12
13,Mar,2,2017,57499.72
0,Apr,3,2017,54014.86
1,Apr,3,2018,148805.86
16,May,4,2018,215062.68
15,May,4,2017,92812.79


In [148]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Valuable Customers')

**The most highest spend Valuable Customers are in May, Unlike Regular customer there no transaction in 2018, valuable customers still doing transaction recently**

In [149]:
# 10 State most of valuablen customers
top_10_customer_state = valuable['customer_state'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_state = valuable[valuable['customer_state'].isin(top_10_customer_state)]

In [152]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_state['customer_state'].value_counts()/len(valuable['customer_city'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers State')
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Percent Customers')

fig.show()

<Figure size 576x432 with 0 Axes>

In [153]:
# 10 City most ofvaluable customers
top_10_customer_city = valuable['customer_city'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_city = valuable[valuable['customer_city'].isin(top_10_customer_city)]

In [154]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_city['customer_city'].value_counts()/len(valuable['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers City')
fig.update_xaxes(title_text='City')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

**Sao Paulo (17.1%), Rio De janeiro (8.1%), it seems our valuable customers most likely scatter in around state of Sao Paulo not in concetrated area like only capital city  Sao Paulo**

In [155]:
df_price_cat = valuable['payment_type'].value_counts()/len(valuable['payment_type'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.show()

In [156]:
top_10_product = valuable['product_category_name_english'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_product = valuable[valuable['product_category_name_english'].isin(top_10_product)]

plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_product['product_category_name_english'].value_counts()/len(valuable['product_category_name_english'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='Category')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

#### **Conclusion Behavior Valuable Customers**

    - Most of them in State: Sao Paolo, Customers city are scatter around capital city of the state
    - Most of them using Credit Cards(58%), a part of them using Boleto(22.7%), voucher is often used too (17%).
    - Favorite Category: Furniture decor, bed bath table, computers accesories, housewares, health beauty, etc
    - Most transaction in 2017 - 2018
    - Their treds numbers increase

    Valuable Characteristic Based On Numerikal Features

        1.  Average Recency: 249 Days
            valuable customers last purchase average was 249 days
        2.  Average Frequency: 7.8
            In average valuable customers buy item between 7 - 8 transaction in our e-commerce
        3.  Average Monetary: 2091
            valuable customers average spending in our e-commerce was R$2091
        4.  Most Transaction valuable customers was in 2017 - 2018
    - The least spend are R$ 19.00
    - The highest spend are R$ 12490.88
    

### **Loyal Customers**

In [158]:
# Numerikal Features Analysis

loyal=segment[segment['k_means_segment'] == 'loyal']
loyal.describe()

Unnamed: 0,year,recency,frequency,monetary
count,26.0,26.0,26.0,26.0
mean,2017.538462,232.692308,13.692308,20175.883077
std,0.508391,143.870433,13.469281,10242.811889
min,2017.0,22.0,4.0,556.13
25%,2017.0,132.25,8.25,14247.46
50%,2018.0,234.5,10.0,17728.94
75%,2018.0,325.25,14.75,24375.5675
max,2018.0,580.0,75.0,45256.0


**Loyal Characteristic Based On Numerikal Features**

        1.  Average Recency: 232 Days
            valuable customers last purchase average was 232 days
        2.  Average Frequency: 13.7
            In average valuable customers buy item between 13 - 14 transaction in our e-commerce
        3.  Average Monetary: 20175.8
            valuable customers average spending in our e-commerce was R$ 20175.8
        4.  Most Transaction valuable customers was in 2017 - 2018

In [162]:
# having the least spend onoccation customers segment
loyal[loyal['monetary'] == loyal['monetary'].describe()[3]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
70997,9a736b248f67d166d2fbb006bcb877c3,loyal,voucher,sao paulo,SP,2017,Jul,furniture_decor,390,75,556.13


**The least loyal customers spend are R$ 556.13**
 
    - buy in the last 390 days
    - transaction using voucher
    - In Sao paulo	
    - Buy Furniture decor
    - 75 transaction

In [164]:
# having the highest spend onoccation customers segment
loyal[loyal['monetary'] ==loyal['monetary'].describe()[7]]   

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
48336,698e1cf81d01a3d389d96145f7fa6df8,loyal,credit_card,goiania,GO,2017,Aug,auto,376,20,45256.0


**The highest loyal customers spend are R$ 45256.0**
 
    - buy in the last 376 days
    - transaction using credit card 
    - In Goiania	
    - Buy Auto
    - 20 transaction

In [166]:
ordered_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# sorting data accoring to ordered_months
loyal['months']=loyal['month'].apply(lambda x:ordered_months.index(x))
loyal = loyal.sort_values('months')

In [180]:
exclude_2016 = loyal[loyal['year'] != 2018]
agg= exclude_2016.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
3,Jan,0,2017,11745.0
0,Apr,3,2017,12834.5
4,Jul,6,2017,556.13
1,Aug,7,2017,45256.0
7,Sep,8,2017,17671.0
6,Oct,9,2017,41013.6
5,Nov,10,2017,86546.1
2,Dec,11,2017,14196.28


In [182]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Loyal Customers 2017')

In [183]:
exclude_2016 = loyal[loyal['year'] != 2017]
agg= exclude_2016.groupby(['month','months','year'])['monetary'].sum().reset_index().sort_values(by='months')
agg

Unnamed: 0,month,months,year,monetary
3,Jan,0,2018,30086.88
2,Feb,1,2018,69099.89
6,Mar,2,2018,26108.98
0,Apr,3,2018,45149.64
7,May,4,2018,19174.38
5,Jun,5,2018,19457.04
4,Jul,6,2018,46169.28
1,Aug,7,2018,39508.26


In [184]:
px.line(agg, x="month" , y="monetary", color = 'year', line_group = 'year', title = 'Total Monetary by Loyal Customers 2018')

In [185]:
# 10 State most of loyal customers
top_10_customer_state = loyal['customer_state'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_state = loyal[loyal['customer_state'].isin(top_10_customer_state)]

In [192]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_state['customer_state'].value_counts()/len(loyal['customer_city'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers State')
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Percent Customers')

fig.show()

<Figure size 576x432 with 0 Axes>

In [193]:
# 10 City most of loyal customers
top_10_customer_city = loyal['customer_city'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_customer_city = loyal[loyal['customer_city'].isin(top_10_customer_city)]

In [194]:
plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_customer_city['customer_city'].value_counts()/len(loyal['customer_city'])*100
pal = sns.color_palette('rocket_r', len(df_price_cat))
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index,title='Regular Customers City')
fig.update_xaxes(title_text='City')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

In [195]:
df_price_cat = loyal['payment_type'].value_counts()/len(loyal['payment_type'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.show()

In [196]:
top_10_product = loyal['product_category_name_english'].value_counts().sort_values(ascending=False).head(10).index
df_top_10_product = loyal[loyal['product_category_name_english'].isin(top_10_product)]

plt.figure(figsize=(8, 6))
df_price_cat = df_top_10_product['product_category_name_english'].value_counts()/len(loyal['product_category_name_english'])*100
fig = px.bar(df_price_cat, x=df_price_cat.index, y= df_price_cat.values,color=df_price_cat.index)
fig.update_xaxes(title_text='Category')
fig.update_yaxes(title_text='Percent Customers')
fig.show()

<Figure size 576x432 with 0 Axes>

#### **Conclusion Behavior Loyal Customers**

    - Most of them in State: Sao Paolo, Customers city are scatter around  city 
    - Most of them using Credit Cards(46%), a half of them using Boleto(50%), a part using voucher is often used too (3.8%).
    - Favorite Category:  computers accesories, furniture decor, office furniture, signaling & security
    - Most transaction in 2017 - 2018
    - Their treds numbers volatile

    Loyal Characteristic Based On Numerikal Features

        1.  Average Recency: 232 Days
            valuable customers last purchase average was 232 days
        2.  Average Frequency: 13.7
            In average valuable customers buy item between 13 - 14 transaction in our e-commerce
        3.  Average Monetary: 20175.8
            valuable customers average spending in our e-commerce was R$ 20175.8
        4.  Most Transaction valuable customers was in 2017 - 2018
    - The least spend are R$ 556.13
    - The highest spend are R$ 45256.0
    

## **The only one Best Customers**

In [197]:
segment[segment['k_means_segment'] == 'best']

Unnamed: 0,customer_unique_id,k_means_segment,payment_type,customer_city,customer_state,year,month,product_category_name_english,recency,frequency,monetary
4674,0a0a92112bd4c708ca5fde585afaa872,best,credit_card,rio de janeiro,RJ,2017,Sep,fixed_telephony,338,8,109312.64


#### **The customer segment that contributes the most to monetary value**

In [204]:
segment.groupby('k_means_segment')['monetary'].sum().sort_values(ascending=False)

k_means_segment
passive     7103825.05
regular     5752600.72
occation    4636434.95
valuable    2181388.39
loyal        524572.96
best         109312.64
Name: monetary, dtype: float64

**As we can see passive customers even the average monetary is low they the most contribute to our monetary value, as we can do we must concetrate to get our passive & regular Customers to spend in our e-commerce**

#### **The Customers segment that recently using our e-commerce for transaction**

In [209]:
segment.groupby('k_means_segment')['recency'].mean().sort_values()

k_means_segment
passive     135.400293
occation    191.126144
loyal       232.692308
valuable    249.767368
best        338.000000
regular     401.560806
Name: recency, dtype: float64

**Passive customers are the most recent using our e-commerce followed by occation, so if we want more transaction it better to advertise our e-commerce to them using our service & get their feedback**

#### **The Customers segment that having most contribute to our transaction**

In [210]:
segment.groupby('k_means_segment')['frequency'].sum().sort_values(ascending=False)

k_means_segment
passive     47860
regular     38374
occation    25180
valuable     5823
loyal         356
best            8
Name: frequency, dtype: int64

**Our transaction are mostly come from 3 segment passive,regular,occation such that we need focus on marketing to these segment**

## **Conclusion Base on Customers segment behavior**

Because the naming of the customer segment is done before determining how it behaves, there is a slight error in its meaning, however

Passive customers are the most customers, contribute the most to monetary value, using our e-commerce recently. The thing that really needs to be considered in these passive customers is they never make a second transaction, we need to take feedback from them so we know what needs to be improved from our e-commerce.

Our customers mainly come from 3 segments, namely passive, regular & occation, if we want to increase transactions in our e-commerce, we need to do marketing to them, how to make them continue to use our e-commerce.

Valuable, loyal & Best customers are important to maintain but we must focus more on more users in our e-commerce.