In [2]:
import pandas as pd
import os
import plotly.graph_objects as go

In [5]:
df = pd.read_csv("./dystrybucja_warzyw.csv")
# mitosheet.sheet(df, analysis_to_replay="id-eheqhoslfn")

In [6]:
fig = go.Figure()
dystrybutor = 'Bio Ananasy i Papryki Magazyn'
towar = 'Jabłka'

k = df.loc[(df['Dystrybutor']==dystrybutor) & (df['Towar']==towar)]
min_date_plot, max_date_plot = k['Data'].min(), k['Data'].max()

fig.add_trace(go.Scatter(x = k['Data'], y = k['Quantity'], name="Quantity"))

for i in k.loc[(k['Quantity']>0)]['Data'].unique():
    fig.add_vline(x=i,line_width=1.5, line_dash="dash", line_color="green", name="Order")
                
fig.update_layout(title_text="{} - {}".format(dystrybutor, towar), title_x=0.5, title_font_color="blue", title_font_size=20,
                              legend_title="", xaxis=dict(rangeselector=dict(buttons=list([dict(step="all"), 
                                                                dict(count=1, label="1m", step="month", stepmode="backward"),
                                                                dict(count=3, label="3m", step="month", stepmode="backward"),
                                                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                                                dict(count=1, label="1y", step="year", stepmode="backward")
                                                                ])), 
                        rangeslider=dict(visible=True), type="date", range = [min_date_plot, max_date_plot]))
    
fig.update_xaxes(rangeslider_thickness = 0.2)
fig.update_layout(autosize=False, width=1500, height=400,)
fig.update_layout(showlegend=True)
fig.show()

In [7]:
import lifetimes
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter, ParetoNBDFitter, ModifiedBetaGeoFitter

In [8]:
def clv_week(df, start_date, clv_horizons = [3], level=['Dystrybutor']):
    
    min_date = pd.to_datetime(df['Data'].min())
    max_date = pd.to_datetime(df['Data'].max())
    df['Data'] = pd.to_datetime(df['Data'])
    
    clv_all = pd.DataFrame()
    date_range = pd.date_range(start = start_date, end = max_date, freq="W-MON") 
    # print(date_range)
    
    for date in date_range:
        df_clv = df.loc[(df['Data']<=date)] 
        
        if 'Towar' in level: 
            df_clv['Dystrybutor_Towar'] = df_clv['Dystrybutor'] + '$' + df_clv['Towar']
        else: 
            df_clv['Dystrybutor_Towar'] = df_clv['Dystrybutor']
        
        clv = lifetimes.utils.summary_data_from_transaction_data(df_clv, 'Dystrybutor_Towar', 'Data', 'Quantity')
        clv = clv.loc[clv['monetary_value']>0]
        
        bg_fitter = BetaGeoFitter(penalizer_coef=0.01)
        bg_fitter.fit(clv['frequency'], clv['recency'], clv['T'])
        
        pareto_fitter = ParetoNBDFitter(penalizer_coef = 0.01)
        pareto_fitter.fit(clv["frequency"], clv["recency"], clv["T"])
        
        ggf = GammaGammaFitter(penalizer_coef=0.01)
        ggf.fit(clv["frequency"], clv["monetary_value"])
        
        clv['Data'] = date
        clv['p_alive'] = bg_fitter.conditional_probability_alive(clv["frequency"], clv["recency"], clv["T"]).round(3)
        
        for clv_horizon in clv_horizons:
            clv['Exp_Orders_'+str(clv_horizon)+'M'] = bg_fitter.conditional_expected_number_of_purchases_up_to_time(clv_horizon*30, clv['frequency'].values, clv['recency'], clv['T']).round(3)            
            clv['CLV_'+str(clv_horizon)+'M'] = ggf.customer_lifetime_value(bg_fitter, clv["frequency"], clv["recency"], clv["T"], clv["monetary_value"],
                                                                                         time = clv_horizon, freq = 'D', discount_rate=0)
            
        clv.drop(columns = ['frequency', 'recency', 'T'], errors='ignore', inplace=True)
        
        if date==date_range[0]:
            clv_all = clv.copy()
        else: 
            clv_all = pd.concat([clv_all, clv], axis=0)
    
    clv_all = clv_all.reset_index()
    
    if 'Towar' in level:
        clv_all['Dystrybutor'] = clv_all['Dystrybutor_Towar'].str.split('$').str[0]
        clv_all['Towar'] = clv_all['Dystrybutor_Towar'].str.split('$').str[1]
        clv_all.drop(columns=['Dystrybutor_Towar'], inplace=True)
        clv_all = clv_all[['Dystrybutor', 'Towar', 'Data', 'p_alive', 'monetary_value'] + clv_all.filter(regex='Exp|CLV|Sales').columns.tolist()] 
    else:
        clv_all.rename(columns = {'Dystrybutor_Towar':'Dystrybutor'}, inplace=True)
    
    return clv_all

In [10]:
clv_warzywa = clv_week(df = df, start_date = '2022-01-15', clv_horizons = [3], level=['Dystrybutor', 'Towar'])

In [12]:
clv_warzywa.sort_values(['Dystrybutor','Towar','Data'])

Unnamed: 0,Dystrybutor,Towar,Data,p_alive,monetary_value,Exp_Orders_3M,CLV_3M
32646,Bio Ananasy i Brokuły Dystrybutor,Pomidory malinowe,2022-09-26,0.912,73.900000,1.868,156.712878
34082,Bio Ananasy i Brokuły Dystrybutor,Pomidory malinowe,2022-10-03,0.898,73.900000,1.740,145.910578
35524,Bio Ananasy i Brokuły Dystrybutor,Pomidory malinowe,2022-10-10,0.879,73.900000,1.609,135.016845
36983,Bio Ananasy i Brokuły Dystrybutor,Pomidory malinowe,2022-10-17,0.867,73.900000,1.508,126.497649
38463,Bio Ananasy i Brokuły Dystrybutor,Pomidory malinowe,2022-10-24,0.850,73.900000,1.405,117.787674
...,...,...,...,...,...,...,...
142071,Świeże Wiśnie i Warzywa Hurtownia,Ziemniaki,2023-11-27,0.994,60.607895,4.907,298.467170
144150,Świeże Wiśnie i Warzywa Hurtownia,Ziemniaki,2023-12-04,0.991,60.607895,4.843,294.546224
146232,Świeże Wiśnie i Warzywa Hurtownia,Ziemniaki,2023-12-11,0.987,60.607895,4.771,290.185735
148318,Świeże Wiśnie i Warzywa Hurtownia,Ziemniaki,2023-12-18,0.980,60.607895,4.692,285.385912


In [25]:
fig = go.Figure()
dystrybutor = 'Naturalne Ananasy i Brokuły Dystrybutor'
towar = 'Jabłka'

k = df.loc[(df['Dystrybutor']==dystrybutor) & (df['Towar']==towar)]
min_date_plot, max_date_plot = k['Data'].min(), k['Data'].max()

# fig.add_trace(go.Scatter(x = k['Data'], y = k['Quantity'], name="Quantity"))

k_clv = clv_warzywa.loc[(clv_warzywa['Dystrybutor']==dystrybutor) & (clv_warzywa['Towar']==towar)]
# fig.add_trace(go.Scatter(x = k_clv['Data'], y = k_clv['p_alive'], line = dict(shape = 'linear', color = 'red', dash = 'solid'),  name="p_alive"))
fig.add_trace(go.Scatter(x = k_clv['Data'], y = k_clv['CLV_3M'], line = dict(shape = 'linear', color = 'darkblue', dash = 'solid'), name="CLV_3M"))

for i in k.loc[(k['Quantity']>0)]['Data'].unique():
    fig.add_vline(x=i,line_width=1.5, line_dash="dash", line_color="green", name="Order")
                
fig.update_layout(title_text="{} - {}".format(dystrybutor, towar), title_x=0.5, title_font_color="blue", title_font_size=20,
                              legend_title="", xaxis=dict(rangeselector=dict(buttons=list([dict(step="all"), 
                                                                dict(count=1, label="1m", step="month", stepmode="backward"),
                                                                dict(count=3, label="3m", step="month", stepmode="backward"),
                                                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                                                dict(count=1, label="1y", step="year", stepmode="backward")
                                                                ])), 
                        rangeslider=dict(visible=True), type="date", range = [min_date_plot, max_date_plot]))
    
fig.update_xaxes(rangeslider_thickness = 0.2)
fig.update_layout(autosize=False, width=1500, height=400,)
fig.update_layout(showlegend=True)
fig.show()

In [24]:
clv_warzywa[clv_warzywa["Dystrybutor"] == "Naturalne Ananasy i Brokuły Dystrybutor"]

Unnamed: 0,Dystrybutor,Towar,Data,p_alive,monetary_value,Exp_Orders_3M,CLV_3M
181,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2022-01-31,0.560,74.700000,7.811,678.041338
406,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2022-02-07,0.820,99.600000,11.266,1172.061438
748,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2022-02-14,0.885,96.042857,16.137,1578.448362
1157,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2022-02-21,0.422,96.042857,5.394,527.657662
1597,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2022-02-28,0.117,96.042857,1.199,117.291611
...,...,...,...,...,...,...,...
140841,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2023-11-27,0.000,96.042857,0.000,0.000011
142919,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2023-12-04,0.000,96.042857,0.000,0.000010
144998,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2023-12-11,0.000,96.042857,0.000,0.000009
147082,Naturalne Ananasy i Brokuły Dystrybutor,Jabłka,2023-12-18,0.000,96.042857,0.000,0.000008
