# A/B Test Analysis for a New Routing Algorithm

## 1. Load the Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind

df = pd.read_csv('data/delivery_data.csv')
df.head()

## 2. Exploratory Data Analysis (EDA)

In [None]:
df.describe()

## 3. Hypothesis Testing

### Fuel Consumption

**Null Hypothesis (H0):** There is no significant difference in fuel consumption between the control and eco algorithms.
**Alternative Hypothesis (H1):** There is a significant difference in fuel consumption between the control and eco algorithms.

In [None]:
control_fuel = df[df['algorithm'] == 'control']['fuel_consumption']
eco_fuel = df[df['algorithm'] == 'eco']['fuel_consumption']

fuel_ttest = ttest_ind(control_fuel, eco_fuel)
print(f'Fuel Consumption t-test p-value: {fuel_ttest.pvalue}')

### Delivery Time

**Null Hypothesis (H0):** There is no significant difference in delivery time between the control and eco algorithms.
**Alternative Hypothesis (H1):** There is a significant difference in delivery time between the control and eco algorithms.

In [None]:
control_time = df[df['algorithm'] == 'control']['delivery_time']
eco_time = df[df['algorithm'] == 'eco']['delivery_time']

time_ttest = ttest_ind(control_time, eco_time)
print(f'Delivery Time t-test p-value: {time_ttest.pvalue}')

## 4. Data Visualization

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
sns.histplot(data=df, x='fuel_consumption', hue='algorithm', kde=True)
plt.title('Fuel Consumption (Liters) Distribution')

plt.subplot(1, 2, 2)
sns.histplot(data=df, x='delivery_time', hue='algorithm', kde=True)
plt.title('Delivery Time Distribution')

plt.savefig('images/distributions.png')
plt.show()

## 5. Financial Analysis

In [None]:
# Financial Assumptions
fuel_cost_per_liter = 1.50  # dollars
driver_cost_per_hour = 20  # dollars
deliveries_per_year = 100000

# Calculate the average differences from the sample
fuel_difference = control_fuel.mean() - eco_fuel.mean()
time_difference = eco_time.mean() - control_time.mean()

# Project annual savings and costs
annual_fuel_savings = fuel_difference * deliveries_per_year * fuel_cost_per_liter
annual_driver_cost_increase = (time_difference / 60) * deliveries_per_year * driver_cost_per_hour

net_annual_impact = annual_fuel_savings - annual_driver_cost_increase

print(f'Projected Annual Fuel Savings: ${annual_fuel_savings:,.2f}')
print(f'Projected Annual Driver Cost Increase: ${annual_driver_cost_increase:,.2f}')
print(f'Net Annual Financial Impact: ${net_annual_impact:,.2f}')

## 6. Conclusion

In [None]:
alpha = 0.05

print('Statistical Significance:')
if fuel_ttest.pvalue < alpha:
    print('- The decrease in fuel consumption is statistically significant.')
else:
    print('- The decrease in fuel consumption is not statistically significant.')

if time_ttest.pvalue < alpha:
    print('- The increase in delivery time is statistically significant.')
else:
    print('- The increase in delivery time is not statistically significant.')

print('
Financial Impact:')
if net_annual_impact > 0:
    print(f'- The new algorithm is projected to save the company ${net_annual_impact:,.2f} annually.')
else:
    print(f'- The new algorithm is projected to cost the company ${-net_annual_impact:,.2f} annually.')