# 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import ttest_ind, chi2_contingency
from scipy.stats import ttest_ind

In [None]:
import sys
sys.path.append('../src')
from functions import *

In [None]:
sys.path.append('../src')
combined_df = pd.read_csv('../Data/Cleaned_Data/combined_df.csv')
pd.set_option('display.max_columns', None)

# 2. Data Preprocessing
### Ensure that the data types are correctly assigned, especially for categorical variables.

In [None]:
combined_df = convert_data_types(combined_df)

# 3. Define Bivariate Analysis

In [None]:
# Tenure + Age + Completion Control
tenure_age_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'clnt_age', 'completion']).size().reset_index(name='count')
tenure_age_completion_control.to_csv('../Tables/tenure_age_completion.csv', index=False)
tenure_age_completion_control

In [None]:
# Tenure + Age + Completion Control Visualization
sns.barplot(data=tenure_age_completion_control, x='clnt_tenure_yr', y='count', hue='completion')
plt.title('Tenure + Age + Completion Control')
plt.xlabel('Tenure (Years)')
plt.ylabel('Count')
plt.show()

In [None]:
# Tenure + Age + Completion Test
tenure_age_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'clnt_age', 'completion']).size().reset_index(name='count')
tenure_age_completion_test.to_csv('../Tables/tenure_age_completion.csv', index=False)
tenure_age_completion_test

In [None]:
# Tenure + Age + Completion Test Visualization
sns.barplot(data=tenure_age_completion_test, x='clnt_tenure_yr', y='count', hue='completion')
plt.title('Tenure + Age + Completion Test')
plt.xlabel('Tenure (Years)')
plt.ylabel('Count')
plt.show()

In [None]:
# Tenure + Gender + Completion Control
tenure_gender_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'gendr', 'completion']).size().reset_index(name='count')
tenure_gender_completion_control.to_csv('../Tables/tenure_gender_completion_control.csv', index=False)
tenure_gender_completion_control


In [None]:
# Tenure + Gender + Completion Control Visualization
sns.barplot(data=tenure_gender_completion_control, x='clnt_tenure_yr', y='count', hue='gendr')
plt.title('Tenure + Gender + Completion Control')
plt.xlabel('Tenure (Years)')
plt.ylabel('Count')
plt.show()

In [None]:
# Tenure + Gender + Completion Test
tenure_gender_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'gendr', 'completion']).size().reset_index(name='count')
tenure_gender_completion_test.to_csv('../Tables/tenure_gender_completion_test.csv', index=False)
tenure_gender_completion_test


In [None]:
# Tenure + Gender + Completion Test Visualization
sns.barplot(data=tenure_gender_completion_test, x='clnt_tenure_yr', y='count', hue='gendr')
plt.title('Tenure + Gender + Completion Test')
plt.xlabel('Tenure (Years)')
plt.ylabel('Count')
plt.show()

In [None]:
# Balance + Tenure + Completion Control
balance_tenure_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['clnt_tenure_yr', 'bal', 'completion']).size().reset_index(name='count')
balance_tenure_completion_control.to_csv('../Tables/balance_tenure_completion_control.csv', index=False)
balance_tenure_completion_control

In [None]:
# Balance + Tenure + Completion Control Visualization
sns.scatterplot(data=balance_tenure_completion_control, x='bal', y='clnt_tenure_yr', hue='completion')
plt.title('Balance + Tenure + Completion Control')
plt.xlabel('Balance')
plt.ylabel('Tenure (Years)')
plt.show()

In [None]:
# Balance + Tenure + Completion Test
balance_tenure_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['clnt_tenure_yr', 'bal', 'completion']).size().reset_index(name='count')
balance_tenure_completion_test.to_csv('../Tables/balance_tenure_completion_test.csv', index=False)
balance_tenure_completion_test

In [None]:
# Balance + Tenure + Completion Test Visualization
sns.scatterplot(data=balance_tenure_completion_test, x='bal', y='clnt_tenure_yr', hue='completion')
plt.title('Balance + Tenure + Completion Test')
plt.xlabel('Balance')
plt.ylabel('Tenure (Years)')
plt.show()

In [None]:
# Total Navigation + Last Step Control
total_nav_last_step_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['total_navigation', 'last_step']).size().reset_index(name='count')
total_nav_last_step_control.to_csv('../Tables/total_nav_last_step_control.csv', index=False)
total_nav_last_step_control

In [None]:

# Total Navigation + Last Step Control Visualization
sns.barplot(data=total_nav_last_step_control, x='total_navigation', y='count', hue='last_step')
plt.title('Total Navigation + Last Step Control')
plt.xlabel('Total Navigation')
plt.ylabel('Count')
plt.show()

In [None]:
# Total Navigation + Last Step Test
total_nav_last_step_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['total_navigation', 'last_step']).size().reset_index(name='count')
total_nav_last_step_test.to_csv('../Tables/total_nav_last_step_test.csv', index=False)
total_nav_last_step_test

In [None]:
# Total Navigation + Last Step Test Visualization
sns.barplot(data=total_nav_last_step_test, x='total_navigation', y='count', hue='last_step')
plt.title('Total Navigation + Last Step Test')
plt.xlabel('Total Navigation')
plt.ylabel('Count')
plt.show()

In [None]:
# Total Navigation and Completions Control
total_nav_completion_control = combined_df[combined_df['Variation'] == 'Control'].groupby(['total_navigation', 'completion']).size().reset_index(name='count')
total_nav_completion_control.to_csv('../Tables/total_nav_completion_control.csv', index=False)
total_nav_completion_control

In [None]:
# Total Navigation and Completions Control Visualization
sns.barplot(data=total_nav_completion_control, x='total_navigation', y='count', hue='completion')
plt.title('Total Navigation and Completions Control')
plt.xlabel('Total Navigation')
plt.ylabel('Count')
plt.show()

In [None]:
# Total Navigation and Completions Test
total_nav_completion_test = combined_df[combined_df['Variation'] == 'Test'].groupby(['total_navigation', 'completion']).size().reset_index(name='count')
total_nav_completion_test.to_csv('../Tables/total_nav_completion_test.csv', index=False)
total_nav_completion_test

In [None]:
# Total Navigation and Completions Test Visualization
sns.barplot(data=total_nav_completion_test, x='total_navigation', y='count', hue='completion')
plt.title('Total Navigation and Completions Test')
plt.xlabel('Total Navigation')
plt.ylabel('Count')
plt.show()

In [None]:
# Step Where Clients Drop Control
steps_drop_control = combined_df[combined_df['Variation'] == 'Control'].groupby('last_step').size().reset_index(name='count')
steps_drop_control.to_csv('../Tables/steps_drop_control.csv', index=False)
steps_drop_control

In [None]:
# Step Where Clients Drop Control Visualization
sns.barplot(data=steps_drop_control, x='last_step', y='count')
plt.title('Step Where Clients Drop Control')
plt.xlabel('Last Step')
plt.ylabel('Count')
plt.show()

In [None]:
# Step Where Clients Drop Test
steps_drop_test = combined_df[combined_df['Variation'] == 'Test'].groupby('last_step').size().reset_index(name='count')
steps_drop_test.to_csv('../Tables/steps_drop_test.csv', index=False)
steps_drop_test

In [None]:
# Step Where Clients Drop Test Visualization
sns.barplot(data=steps_drop_test, x='last_step', y='count')
plt.title('Step Where Clients Drop Test')
plt.xlabel('Last Step')
plt.ylabel('Count')
plt.show()


# 4. Calculate Correlation Coefficients