# Objectives

To plot user journeys using a line plot which gives us an idea of the number of customers that were a part of a certain user journey.

- recalculate the rfm scores and loyalty scores
- add the ltv parameter from the lifetimes library
- plot the number of transaction on the x axis
- plot the loyalty score (or an equivalent parameter on the y-axis) 

## Data requirement for running the notebook

This primarily caters to the shopify clients which means that the customer_df has additional data fields.

1. transactions_df :
2. customers_df :
3. merchandise_df :

**Notes** 
- recency is calculated as per last order date of the customer and not the present date

**Todo** 
- frequency change calc
- filtering of order count just for plotting
- nth purchase - x axis / loyalty level - y axis

(--------)

- bar chart
- grouping clustering 
- markers 
    - products on the customer journey
    - symptom vs trigger
- smoothing the curve of the customer journey

**Add todo**
- Measuring the intervals between purchases. 


--------------------------

- grouping in line plot (general and based on loyalty tiers)

In [1]:
#data processing and math
import pandas as pd
import numpy as np

#visualization
import seaborn as sns
import plotly.offline as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio

from datetime import datetime,timedelta

#lifetime library

#pandas setting to see all columns and rows
pd.set_option("display.max_columns", 50)

In [2]:
#set path accordingly if running in your own local system and make sure you have the required data from gcp
path = '/home/td/Desktop/cerebra_clients_data/data_ridgewallet/'

#transactions data
combined_transformed_transaction_lines_df = pd.read_parquet(path+'combined_data_points/ridgewallet_parquet_files_combined_transformed_transaction_lines_part-00000-f186deaa-9bcb-4e68-ae38-810a9582b681-c000.snappy.parquet')
combined_transformed_transaction_lines_df_2 = pd.read_parquet(path+'combined_data_points/latest_combined_transaction_lines_20210301_20210708.parquet')
dfs = [combined_transformed_transaction_lines_df, combined_transformed_transaction_lines_df_2]
combined_transactions = pd.concat(dfs)

#customer data to get customer's affinity to marketing and their order count
customer_df = pd.read_json(path+'/customers/ridgewallet_raw_data_customers_customers.json')

#merchandise data for product information
merchandise_df = pd.read_parquet(path+'/combined_data_points/ridgewallet_parquet_files_combined_merchandise_part-00000-c2c7c29c-d500-45b8-b045-08a71464a5da-c000.snappy.parquet')

FileNotFoundError: [Errno 2] No such file or directory: '/home/td/Desktop/cerebra_clients_data/data_ridgewallet/combined_data_points/ridgewallet_parquet_files_combined_transformed_transaction_lines_part-00000-f186deaa-9bcb-4e68-ae38-810a9582b681-c000.snappy.parquet'

In [None]:
#dropping # and none customer_ids
combined_transactions = combined_transactions[combined_transactions['customer_id'].str.contains("#")==False]
combined_transactions = combined_transactions[combined_transactions['customer_id'].str.contains("None")==False]
combined_transactions = combined_transactions[combined_transactions['customer_id'].str.contains("nan")==False]

#actual metric file has these checks
#combined_transactions = combined_transactions[combined_transactions.customer_id != 'None']
#combined_transactions = combined_transactions[combined_transactions.customer_id != 'nan']

#dropping unnecessary columns which arent being used
"""
customer_df = customer_df.drop(['email','created_at','updated_at', 'state', 'last_order_id', 'note', 'multipass_identifier',                                
                              'tax_exempt','tags','last_order_name','currency','addresses','accepts_marketing_updated_at',
                             'marketing_opt_in_level','tax_exemptions','admin_graphql_api_id','default_address'], axis=1)
"""
#converting customer_id and product_id to int
customer_df = customer_df.rename(columns={"id":"customer_id"}) 
combined_transactions['customer_id']=combined_transactions.customer_id.astype(str)

In [None]:
combined_transactions

In [None]:
present_date = datetime.now()

#add recency,freq,T,mv for churn
rfm = combined_transactions.assign(recency = lambda x: (present_date - x["order_date"]).dt.days,
                                   
                                  monetary_value = combined_transactions['revenue'],
                                  )

#rfm['frequency'] = combined_transactions.groupby(['customer_id'])['order_date'].transform('count')
#rfm['frequency'] = combined_transactions.groupby(by=['customer_id','order_date']).count()


#rfm["frequency"] = combined_transactions.groupby(["customer_id","order_date"]).size()
#frequency = combined_transactions.groupby(["customer_id"]).order_date.value_counts().loc["5276505230",:]
#frequency = combined_transactions.groupby('customer_id').order_date.value_counts()
#frequency.rename(columns={"order_date":"frequency"})
#rfm.merge(frequency.to_frame(), on='customer_id')
#rfm.rename(columns={"order_date_y":"frequency"})

## optimize the groupby 

- https://stackoverflow.com/questions/51975512/faster-alternative-to-perform-pandas-groupby-operation

In [None]:
rfm['frequency'] = combined_transactions.groupby("customer_id")["order_date"].transform(lambda x: x.factorize()[0] + 1)

In [None]:
rfm.to_csv("Intermediate_rfm_df")

In [None]:
rfm.loc[rfm["customer_id"]=="5276505230"]

In [None]:
r_score_range = range(1,6)
m_score_range = range(1,4)

r_score = pd.qcut(rfm['recency'], q=5, labels=r_score_range)
m_score = pd.qcut(rfm['monetary_value'], q=3, labels=m_score_range, duplicates='drop')

#'R_rev' because higher number of days reflects lower recency score 
rfm_with_labels = rfm.assign(R_rev = r_score.values, M = m_score.values)

#converting all the columns to int
cols = ['R_rev','frequency','M']
rfm_with_labels[cols]=rfm_with_labels[cols].apply(pd.to_numeric, errors="raise", axis=1)

#adjusting the R score
rfm_with_labels['R'] = 6 - rfm_with_labels['R_rev']

#adding RFM scores
rfm_with_labels['loyalty_score'] = rfm_with_labels[['R', 'frequency', 'M']].sum(axis=1)

#cleaning up the final dataframe
rfm_with_score = rfm_with_labels.reset_index()
customer_loyalty_df = rfm_with_score.drop(['index','R_rev'], axis = 1)

In [None]:
#setting up custom heuristics
customer_loyalty_df['customer_id'] = customer_loyalty_df.customer_id.astype(str)
customer_df['customer_id'] = customer_loyalty_df.customer_id.astype(str)
#input for custom heuristic
#when customer_df and customer_loyalty_df are merged, all customer_ids (current customers) are expected to be present in the customer_df
custom_heuristic_input_df = pd.merge(customer_loyalty_df, customer_df, on='customer_id', how='left')

#the three heuristic rules
custom_heuristic_output_df = custom_heuristic_input_df.loc[custom_heuristic_input_df['orders_count']>1]
custom_heuristic_output_df = custom_heuristic_output_df[custom_heuristic_output_df['accepts_marketing']==True]
custom_heuristic_output_df = custom_heuristic_output_df[custom_heuristic_output_df['verified_email']==True]

#output
custom_heuristic_output_df = custom_heuristic_output_df.reset_index()
final_output_df = custom_heuristic_output_df.drop(['index'],axis=1)

In [None]:
customer_loyalty_df.to_csv("customer_loyalty_before_heuristic_filtering.csv")

In [None]:
final_output_df.to_csv("final_output_df.csv")

In [None]:
merchandise_df['product_id'] = merchandise_df.product_id.astype(str)
final_output_df['product_id'] = final_output_df.product_id.astype(str)
product_loyalty_df = pd.merge(final_output_df, merchandise_df, on='product_id', how='left')
product_loyalty_df['customer_id'] = product_loyalty_df.customer_id.astype(str)
product_loyalty_df = product_loyalty_df.drop_duplicates(subset=['order_id','product_id','customer_id','order_date'])
product_loyalty_df = product_loyalty_df.reset_index()
product_loyalty_df = product_loyalty_df.drop(columns=['index'])

In [None]:
product_loyalty_df

In [None]:
product_loyalty_df.loc[product_loyalty_df['customer_id']=='580937580618']

In [None]:
#load the results for future plots

In [None]:
fig = px.line(product_loyalty_df, x='order_date', y='loyalty_score', color='category_name')
fig.show()

In [None]:
fig = px.line(product_loyalty_df[:500], x='order_date', y='loyalty_score', color='customer_id')
fig.show()

# Plot

1. ticks to indicate product markers - https://plotly.com/python/tick-formatting/
2. marker symbols to indicate products - https://plotly.com/python/marker-style/#custom-marker-symbols

In [None]:
# uploading the older results file - product_loyalty_df

product_loyalty_df = pd.read_csv('product_loyalty_df.csv')
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("nan")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("nan")==False]

product_loyalty_df = product_loyalty_df[product_loyalty_df.frequency > 3]
product_loyalty_df = product_loyalty_df.reset_index(drop=True)

In [None]:
product_loyalty_df.loc[product_loyalty_df['customer_id']==580937580618]

### Additional 

In [None]:
#scaling using qcut
loyalty_score_range = range(1,5)
freq_range = range(1,10)
freq_range = pd.qcut(product_loyalty_df['frequency'], q=10, labels=freq_range, duplicates='drop')
loyalty_score = pd.qcut(product_loyalty_df['loyalty_score'], q=4, labels=loyalty_score_range, duplicates='drop')
product_loyalty_df = product_loyalty_df.assign(LS = loyalty_score.values, Freq=freq_range.values)
product_loyalty_df['LS'] = product_loyalty_df['LS'].apply(pd.to_numeric, errors="raise")
product_loyalty_df['Freq'] = product_loyalty_df['Freq'].apply(pd.to_numeric, errors="raise")

In [None]:
loyalty_levels = product_loyalty_df["LS"].value_counts().keys().tolist()
no_of_customers = product_loyalty_df["LS"].value_counts().values.tolist()
trace = go.Pie(labels = loyalty_levels, values = no_of_customers, marker = dict( line = dict(color = "white", width = 1.3)), hoverinfo = "value+text", hole=.5)
layout = go.Layout(dict(title = "What does your customerbase look like on the basis of loyalty ?", plot_bgcolor = "rgb(243,243,243)", paper_bgcolor = "rgb(243,243,243)",))
data=[trace]
fig = go.Figure(data = data, layout = layout,
               )
#texttemplate = "%{l}: %{v:$,s}", textposition = "inside"
py.iplot(fig)

In [None]:
# Data wrangling for plotting bar chart
level_map = {1: 'Low', 2: 'Medium', 3: 'High', 4:'Supreme'}
product_loyalty_df['loyalty_labels'] = product_loyalty_df['LS'].map(level_map)

In [None]:
loyalty_levels = product_loyalty_df["loyalty_labels"].value_counts().keys().tolist()
no_of_customers = product_loyalty_df["loyalty_labels"].value_counts().values.tolist()
trace = go.Pie(labels = loyalty_levels, values = no_of_customers, marker = dict( line = dict(color = "white", width = 1.3)), hoverinfo = "value+text", hole=.5)
layout = go.Layout(dict(title = "What does your customerbase look like on the basis of loyalty ?", plot_bgcolor = "rgb(243,243,243)", paper_bgcolor = "rgb(243,243,243)",))
data=[trace]
fig = go.Figure(data = data, layout = layout,
               )
#texttemplate = "%{l}: %{v:$,s}", textposition = "inside"
py.iplot(fig)

In [None]:
#bar chart with customisations
colors = ['red','blue','green','gold']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = product_loyalty_df['loyalty_labels'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)



barchart_loyalty_distribution = go.Figure()
barchart_loyalty_distribution.add_trace(go.Bar(
                    x = loyalty_levels,
                    y = no_of_customers, 
                    width = [0.5, 0.5, 0.5, 0.5],
                    #marker_color = opts,
                    hovertemplate = 
                    "Total number of customers: %{y}<br>" +
                    "Loyalty level: %{x}<extra></extra>",
                    ))

barchart_loyalty_distribution.update_layout(#plot_bgcolor = "rgb(243,243,243)",
                    #paper_bgcolor = "rgb(243,243,243)",
                    title = dict(text = "Distribution of customerbase on the basis of loyalty ?"),
                    xaxis = dict(title = "Loyalty levels",
                                 linecolor = "#909497", 
                                
                                ), 
                    yaxis = dict(title = "No of Customers", 
                                 linecolor = "#909497",
                                ))

barchart_loyalty_distribution.show()

In [None]:
#bar chart with customisations
barchart_loyalty_distribution = go.Figure()
barchart_loyalty_distribution.add_trace(go.Bar(
                    x = loyalty_levels,
                    y = no_of_customers, 
                    width = [0.5, 0.5, 0.5, 0.5],
                    marker_color = ['blue','red','green','gold']
                    #dx = 4.0,
                    #name = 
                    ))

#layout = go.Layout(dict(title = "What does your customerbase look like on the basis of loyalty ?",
#                        plot_bgcolor = "rgb(243,243,243)",
#                        paper_bgcolor = "rgb(243,243,243)",))

barchart_loyalty_distribution.update_layout(plot_bgcolor = "rgb(243,243,243)",
                    #font = dict(color = "#909497"),
                    paper_bgcolor = "rgb(243,243,243)",
                    title = dict(text = "Distribution of customerbase on the basis of loyalty ?"),
                    xaxis = dict(title = "Loyalty levels", linecolor = "#909497", 
                                #tickprefix = "&#8377;"
                                ), 
                    yaxis = dict(title = "No of Customers", 
                                 #tickformat = ",",
                                 linecolor = "#909497",
                                #categoryorder = "array", categoryarray = CATEGORY_ORDER
                                ))


#set orientation to horizontal, "orientation = "h" because we want to flip the x and y-axis
barchart_loyalty_distribution.show()

In [None]:
product_loyalty_df = product_loyalty_df.sort_values(by="order_date")
fig = px.line(product_loyalty_df[:100], x='order_date', y='loyalty_score', color='customer_id')
fig.show()

In [None]:
product_loyalty_df = product_loyalty_df.sort_values(by="order_date")
fig = px.line(product_loyalty_df, x='order_date', y='loyalty_score', color='customer_id')
fig.show()

In [None]:
fig = px.line(product_loyalty_df, x='frequency', y='LS', color='customer_id')
fig.show()

In [None]:
fig = px.line(product_loyalty_df, x='Freq', y='loyalty_score', color='customer_id')
fig.show()

In [None]:
fig = px.line(product_loyalty_df, x='frequency', y='loyalty_score', color='customer_id')#, #marker=True)
fig.show()

In [None]:
fig = px.line(product_loyalty_df, x='frequency', y='loyalty_score', color='customer_id')#, #marker=True)
fig.show()

## Filters

1. Using a savitzky-golay filter - https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter
2. Cookbook example - https://scipy.github.io/old-wiki/pages/Cookbook/SavitzkyGolay

In [None]:
from scipy.signal import savgol_filter

fig = px.line(product_loyalty_df, x='frequency', y='loyalty_score', color='customer_id')
fig.show()

In [None]:
import scipy
from scipy import signal

#product_loyalty_df['order_date'] = product_loyalty_df['order_date'].astype(float)

fig = go.Figure()
fig.add_trace(go.Scatter(x=scipy.signal.savgol_filter(product_loyalty_df['frequency'], 51, 3),
                        y=scipy.signal.savgol_filter(product_loyalty_df['loyalty_score'], 51,3),
                         
                        ))
fig.update_layout(
                    #title = dict(text = "What does your overall customer journey look like ?"),
                    xaxis = dict(title = "Frequency", linecolor = "#909497", 
                                #tickprefix = "&#8377;"
                                ), 
                    yaxis = dict(title = "Loyalty score", 
                                 #tickformat = ",",
                                 linecolor = "#909497",
                                #categoryorder = "array", categoryarray = CATEGORY_ORDER
                                ))

fig.show()

In [None]:
import scipy
from scipy import signal

#product_loyalty_df['customer_id'] = product_loyalty_df['customer_id'].astype(int)
#product_loyalty_df['order_date']=.customer_id.astype(str)


fig = go.Figure()
fig.add_trace(go.Scatter(x=scipy.signal.savgol_filter(product_loyalty_df['frequency'], 51, 3),
                        y=scipy.signal.savgol_filter(product_loyalty_df['loyalty_score'], 51,3),
                        ))
fig.show()

In [None]:
import scipy
from scipy import signal

#product_loyalty_df['customer_id'] = product_loyalty_df['customer_id'].astype(int)

fig = go.Figure()
fig.add_trace(go.Scatter(x=scipy.signal.savgol_filter(product_loyalty_df['frequency'], 51, 3),
                        y=product_loyalty_df['loyalty_score'],
                        ))
fig.show()

In [None]:
import scipy
from scipy import signal

#product_loyalty_df['customer_id'] = product_loyalty_df['customer_id'].astype(int)

fig = go.Figure()
fig.add_trace(go.Scatter(x=product_loyalty_df['frequency'],
                        y=scipy.signal.savgol_filter(product_loyalty_df['loyalty_score'], 51, 3),
                        ))
fig.show()

In [None]:
product_loyalty_df = product_loyalty_df.sort_values(by="order_date")
test = product_loyalty_df.loc[product_loyalty_df['customer_id']==580937580618]

sample_1 = test[['customer_id','order_date','loyalty_score','product_name']].copy()


fig = go.Figure(go.Scatter(#x=scipy.signal.savgol_filter(test['frequency'],51,3), 
                         #y=scipy.signal.savgol_filter(test['loyalty_score'], 51,3),
                         x=test['order_date'],
                         y=test['loyalty_score'],
                         mode = 'lines+markers',
                         name = '580937580618',
                         ))
#fig.add_trace()


fig.add_traces(go.Scatter(x = sample_1['order_date'],
                         y = sample_1['product_name'],
                         mode = 'markers',
                         marker = dict(symbol='circle-dot', size = 12, color='black'),
                         name = 'Product names'
                        ))

#fig.update_layout(title='Customer Journeys')
fig.show()

In [None]:
product_loyalty_df.loc[product_loyalty_df['customer_id']==580937580618]

In [None]:
colors = ['red', 'blue', 'green', 'gold']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = product_loyalty_df['product_name'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)

    
#value_counts().values.tolist()    

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 1.0,
  marker = dict(
      size = 10,
      sizemode = 'area',
      sizeref = 200000,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['loyalty_labels'],
        styles = opts
    )
  ]
)]

layout = dict(
    yaxis = dict(
        #type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)

In [None]:
colors = ['red', 'blue', 'green', 'gold']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = product_loyalty_df['product_name'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)

    
#value_counts().values.tolist()    

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 1.0,
  marker = dict(
      size = 10,
      sizemode = 'area',
      sizeref = 200000,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['category_name'],
        styles = opts
    )
  ]
)]

layout = dict(
    yaxis = dict(
        #type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)

In [None]:
colors = ['red', 'blue', 'green', 'gold']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = product_loyalty_df['product_name'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)

    
#value_counts().values.tolist()    

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 1.0,
  marker = dict(
      size = 10,
      sizemode = 'area',
      sizeref = 200000,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        styles = opts
    )
  ]
)]

layout = dict(
    yaxis = dict(
        #type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)

In [None]:
colors = ['red', 'blue', 'green', 'gold']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = product_loyalty_df['product_name'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)

    
#value_counts().values.tolist()    

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 1.0,
  marker = dict(
      size = 10,
      sizemode = 'area',
      sizeref = 200000,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 424578318350
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['loyalty_labels'],
        styles = opts
    )
  ]
)]

layout = dict(
    yaxis = dict(
        #type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)

In [None]:
data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['frequency'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 0.4,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['loyalty_labels'],
        styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers</b><br> For customer id 580937580618',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 0.4,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers</b><br> For customer id 580937580618',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['frequency'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 0.4,
  ),
  transforms = [
      dict(
        type = 'filter',
        target = product_loyalty_df['customer_id'],
        orientation = '=',
        value = 580937580618
      ),
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers</b><br> For customer id 580937580618',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['frequency'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 20,
      #autocolorscale = True,
  ),
  transforms = [
      
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        #styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers and symptoms</b><br> For entire customerbase',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 20,
      #autocolorscale = True,
  ),
  transforms = [
      
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        #styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers and symptoms</b><br> For entire customerbase',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
product_loyalty_df = pd.read_csv('product_loyalty_df.csv')
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("nan")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("nan")==False]

product_loyalty_df = product_loyalty_df[product_loyalty_df.frequency > 3]
product_loyalty_df = product_loyalty_df.reset_index(drop=True)









data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 20,
      #autocolorscale = True,
  ),
  transforms = [
      
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        #styles = opts
      ),
      dict(
        type = 'aggregate',
        groups = product_loyalty_df['product_name'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers and symptoms</b><br> For entire customerbase',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
product_loyalty_df = pd.read_csv('product_loyalty_df.csv')
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['product_name'].str.contains("nan")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("#")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("None")==False]
product_loyalty_df = product_loyalty_df[product_loyalty_df['category_name'].str.contains("nan")==False]

product_loyalty_df = product_loyalty_df[product_loyalty_df.frequency > 3]
product_loyalty_df = product_loyalty_df.reset_index(drop=True)









data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = product_loyalty_df['order_date'],
  y = product_loyalty_df['loyalty_score'],
  text = product_loyalty_df['product_name'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = product_loyalty_df['product_name'].value_counts().values.tolist(),
      #sizemode = 'area',
      #sizeref = 20,
      #autocolorscale = True,
  ),
  transforms = [
      
      dict(
        type = 'groupby',
        groups = product_loyalty_df['product_name'],
        #styles = opts
      ),
      #dict(
      #  type = 'aggregate',
       # groups = product_loyalty_df['product_name'],
      #  aggregations = [
      #      dict(target = 'x', func = 'avg'),
       #     dict(target = 'y', func = 'avg'),
        #    dict(target = 'marker.size', func = 'sum')
       # ]
     # )
  ]
)]

layout = dict(
    title = '<b>Ridgewallet loyalty triggers and symptoms</b><br> For entire customerbase',
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
product_loyalty_df.loc[product_loyalty_df['product_id']==1602562687050]

In [None]:
product_loyalty_df.loc[product_loyalty_df['product_id']==3798739451978]

In [None]:
z = product_loyalty_df['product_name'].value_counts().values.tolist()


In [None]:
test = product_loyalty_df.loc[product_loyalty_df['customer_id']==580937580618]
fig = go.Figure()

fig.add_trace(go.Scatter(#x=scipy.signal.savgol_filter(test['frequency'], 51, 3), 
                         #y=scipy.signal.savgol_filter(test['loyalty_score'], 51, 3),
                         x=test['order_date'],
                         y=test['loyalty_score'],
                         mode = 'lines',
                         name = 'customer_id 580937580618',
                         ))


fig.add_traces(go.Scatter(x=test['product_name'],y=test['product_name'], mode = 'markers',
              marker =dict(symbol='circle-dot', size = 10),
              name='Product'))

fig.show()

In [None]:
test = product_loyalty_df.loc[product_loyalty_df['customer_id']==580937580618]
fig = go.Figure()

fig.add_trace(go.Scatter(x=test.frequency, 
                         y=test.loyalty_score,
                         mode = 'lines',
                         name = 'customer_journeys'
                         ))


fig.update_traces(go.Scatter(x=test.frequency,
                             y=test['product_name'],
                             mode = 'markers',
                             marker = dict(symbol='triangle-down', size = 16),
                             name = 'Flag'
                            ))

In [None]:
test = product_loyalty_df.loc[product_loyalty_df['customer_id']==424578318350]
fig = go.Figure()

fig.add_trace(go.Scatter(x=test.frequency, 
                         y=test.loyalty_score,
                         mode = 'lines',
                         name = 'customer_journeys'
                         ))


fig.add_traces(go.Scatter(x=test.frequency, y=test['product_name'], mode = 'markers',
              marker =dict(symbol='triangle-down', size = 16),
              name='Product'))

## Other

In [None]:
l = [["a", 12, 12], ["a", 12, 33.], ["b", 12.3, 12.3], ["a", 13, 1]]
df = pd.DataFrame(l, columns=["a", "b", "c"])
df

In [None]:
df['freq'] = df.groupby("a")["b"].apply(lambda x: x.groupby(x).ngroup() + 1)
df

In [None]:
df['freq'] = df.groupby("a")["b"].transform(lambda x: x.factorize()[0] + 1)
df

In [None]:
l = [["a", 12, 12, 1], ["a", 12, 33.0, 1], ["b", 12.3, 12.3, 1], ["a", 13, 1, 2]]
df = pd.DataFrame(l, columns=["a", "b", "c", "freq"])
df

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter


x = np.linspace(0,2*np.pi,100)
y = np.sin(x) + np.random.random(100) * 0.2
#yhat = savitzky_golay(y, 51, 3) # window size 51, polynomial order 3
yhat = savgol_filter(y, 51, 3) # window size 51, polynomial order 3


plt.plot(x,y)
plt.plot(x,yhat, color='red')
plt.show()


In [None]:
import plotly.io as pio

import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv")

colors = ['blue', 'orange', 'green', 'red', 'purple']

opt = []
opts = []
for i in range(0, len(colors)):
    opt = dict(
        target = df['continent'][[i]].unique(), value = dict(marker = dict(color = colors[i]))
    )
    opts.append(opt)

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = df['lifeExp'],
  y = df['gdpPercap'],
  text = df['continent'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = df['pop'],
      sizemode = 'area',
      sizeref = 200000
  ),
  transforms = [
      dict(
        type = 'filter',
        target = df['year'],
        orientation = '=',
        value = 2007
      ),
      dict(
        type = 'groupby',
        groups = df['continent'],
        styles = opts
    )]
)]

layout = dict(
    yaxis = dict(
        type = 'log'
    )
)

fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)

In [None]:
f =df['pop']
f

In [None]:
import plotly.io as pio
import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv")

data = [dict(
  type = 'scatter',
  mode = 'markers',
  x = df['lifeExp'],
  y = df['gdpPercap'],
  text = df['continent'],
  hoverinfo = 'text',
  opacity = 0.8,
  marker = dict(
      size = df['pop'],
      sizemode = 'area',
      sizeref = 200000
  ),
  transforms = [
      dict(
        type = 'filter',
        target = df['year'],
        orientation = '=',
        value = 2007
      ),
      dict(
        type = 'aggregate',
        groups = df['continent'],
        aggregations = [
            dict(target = 'x', func = 'avg'),
            dict(target = 'y', func = 'avg'),
            dict(target = 'marker.size', func = 'sum')
        ]
      )]
)]

layout = dict(
    yaxis = dict(
        type = 'log'
    )
)


fig_dict = dict(data=data, layout=layout)

pio.show(fig_dict, validate=False)

In [None]:
product_loyalty_df.info()

#Other commands
opts