# 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import ttest_ind, chi2_contingency
from scipy.stats import ttest_ind

In [2]:
import sys
sys.path.append('../src')
from functions import *

In [3]:
sys.path.append('../src')
combined_df = pd.read_csv('../Data/Cleaned_Data/combined_df.csv')
pd.set_option('display.max_columns', None)

# 2. Data Preprocessing
### Ensure that the data types are correctly assigned, especially for categorical variables.

In [4]:
combined_df = convert_data_types(combined_df)

# 3. Categorical vs Categorical Analysis
#FOR EXAMPLE THE Crosstab and Chi-square Tests
# HERE GOES WHAT REETU DID

# Categorical vs Continuous Analysis
# HERE GOES THE ANALYSIS CODE WWITH VIOLIN AND VISUALIZATIONS

# Exploratory Data Analysis (EDA)


## Visualizations and Tables for Tableau

In [5]:
combined_df.columns

Index(['client_id', 'visit_visitor_id', 'start_time', 'step_1', 'step_2',
       'step_3', 'date', 'bt_1', 'bt_2', 'bt_3', 'total_navigation',
       'last_step', 'completion', 'total_time_visit', 'Variation',
       'clnt_tenure_yr', 'clnt_tenure_mnth', 'clnt_age', 'gendr', 'num_accts',
       'bal', 'calls_6_mnth', 'logons_6_mnth'],
      dtype='object')

In [6]:
# Tenure + Age + Completion Control
tenure_age_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'clnt_age', 'completion']).size().reset_index(name='count')
tenure_age_completion_control.to_csv('../Tables/tenure_age_completion.csv', index=False)
tenure_age_completion_control

Unnamed: 0,clnt_tenure_yr,clnt_age,completion,count
0,2,19,True,1
1,2,22,False,1
2,2,22,True,1
3,2,25,False,6
4,2,25,True,3
...,...,...,...,...
3544,52,62,False,2
3545,52,62,True,1
3546,54,39,False,2
3547,55,48,False,1


In [7]:
# Tenure + Age + Completion Test
tenure_age_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'clnt_age', 'completion']).size().reset_index(name='count')
tenure_age_completion_test.to_csv('../Tables/tenure_age_completion.csv', index=False)
tenure_age_completion_test

Unnamed: 0,clnt_tenure_yr,clnt_age,completion,count
0,2,21,True,1
1,2,22,False,1
2,2,23,False,1
3,2,23,True,4
4,2,24,False,1
...,...,...,...,...
3614,51,60,True,1
3615,55,42,False,1
3616,55,42,True,1
3617,55,45,True,1


In [8]:
# Tenure + Gender + Completion Control
tenure_gender_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'gendr', 'completion']).size().reset_index(name='count')
tenure_gender_completion_control.to_csv('../Tables/tenure_gender_completion_control.csv', index=False)
tenure_gender_completion_control


Unnamed: 0,clnt_tenure_yr,gendr,completion,count
0,2,F,False,5
1,2,F,True,2
2,2,M,False,3
3,2,M,True,3
4,2,Other,False,27
...,...,...,...,...
235,52,F,False,2
236,52,F,True,1
237,54,F,False,2
238,55,F,False,1


In [9]:
# Tenure + Gender + Completion Test
tenure_gender_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'gendr', 'completion']).size().reset_index(name='count')
tenure_gender_completion_test.to_csv('../Tables/tenure_gender_completion_test.csv', index=False)
tenure_gender_completion_test


Unnamed: 0,clnt_tenure_yr,gendr,completion,count
0,2,F,False,1
1,2,F,True,2
2,2,M,False,1
3,2,M,True,4
4,2,Other,False,12
...,...,...,...,...
239,51,F,False,1
240,51,F,True,3
241,55,M,False,1
242,55,M,True,2


In [10]:
# Balance + Tenure + Completion Control
balance_tenure_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'bal', 'completion']).size().reset_index(name='count')
balance_tenure_completion_control.to_csv('../Tables/balance_tenure_completion_control.csv', index=False)
balance_tenure_completion_control

Unnamed: 0,clnt_tenure_yr,bal,completion,count
0,2,27397.75,False,1
1,2,27397.75,True,1
2,2,29146.95,False,1
3,2,30085.17,True,1
4,2,30098.53,True,1
...,...,...,...,...
27242,52,57944.95,False,2
27243,52,57944.95,True,1
27244,54,70645.03,False,2
27245,55,157703.82,False,1


In [11]:
# Balance + Tenure + Completion Test
balance_tenure_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'bal', 'completion']).size().reset_index(name='count')
balance_tenure_completion_test.to_csv('../Tables/balance_tenure_completion_test.csv', index=False)
balance_tenure_completion_test

Unnamed: 0,clnt_tenure_yr,bal,completion,count
0,2,26814.29,True,2
1,2,26819.15,True,1
2,2,26903.76,True,1
3,2,29931.46,False,1
4,2,29959.24,True,1
...,...,...,...,...
30475,51,70986.53,True,1
30476,55,45780.56,False,1
30477,55,130125.49,False,1
30478,55,130125.49,True,1


In [12]:
# Total Navigation + Last Step Control
total_nav_last_step_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['total_navigation', 'last_step']).size().reset_index(name='count')
total_nav_last_step_control.to_csv('../Tables/total_nav_last_step_control.csv', index=False)
total_nav_last_step_control

Unnamed: 0,total_navigation,last_step,count
0,0,confirm,12227
1,0,start,8172
2,0,step_1,2807
3,0,step_2,1158
4,0,step_3,1767
...,...,...,...
62,15,step_1,1
63,15,step_2,1
64,17,confirm,2
65,18,step_1,1


In [13]:
# Total Navigation + Last Step Test
total_nav_last_step_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['total_navigation', 'last_step']).size().reset_index(name='count')
total_nav_last_step_test.to_csv('../Tables/total_nav_last_step_test.csv', index=False)
total_nav_last_step_test

Unnamed: 0,total_navigation,last_step,count
0,0,confirm,3362
1,0,start,5085
2,1,confirm,196
3,1,start,937
4,1,step_1,2217
...,...,...,...
102,28,start,1
103,32,confirm,1
104,47,confirm,1
105,48,start,1


In [14]:
# Total Navigation and Completions Control
total_nav_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['total_navigation', 'completion']).size().reset_index(name='count')
total_nav_completion_control.to_csv('../Tables/total_nav_completion_control.csv', index=False)
total_nav_completion_control

Unnamed: 0,total_navigation,completion,count
0,0,False,13904
1,0,True,12227
2,1,False,552
3,1,True,897
4,2,False,984
5,2,True,342
6,3,False,805
7,3,True,1303
8,4,False,306
9,4,True,277


In [15]:
# Total Navigation and Completions Test
total_nav_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['total_navigation', 'completion']).size().reset_index(name='count')
total_nav_completion_test.to_csv('../Tables/total_nav_completion_test.csv', index=False)
total_nav_completion_test

Unnamed: 0,total_navigation,completion,count
0,0,False,5085
1,0,True,3362
2,1,False,3208
3,1,True,196
4,2,False,2116
5,2,True,107
6,3,False,2535
7,3,True,13227
8,4,False,983
9,4,True,2054


In [16]:
# Step Where Clients Drop Control
steps_drop_control = combined_df[combined_df['Variation'] == 'Control'].groupby('last_step').size().reset_index(name='count')
steps_drop_control.to_csv('../Tables/steps_drop_control.csv', index=False)
steps_drop_control

Unnamed: 0,last_step,count
0,confirm,15336
1,start,9639
2,step_1,3498
3,step_2,1459
4,step_3,2249


In [17]:
# Step Where Clients Drop Test
steps_drop_test = combined_df[combined_df['Variation'] == 'Test'].groupby('last_step').size().reset_index(name='count')
steps_drop_test.to_csv('../Tables/steps_drop_test.csv', index=False)
steps_drop_test


Unnamed: 0,last_step,count
0,confirm,21435
1,start,9448
2,step_1,3136
3,step_2,1321
4,step_3,1782
