<a href="https://colab.research.google.com/github/HalyaIss/Python-Projects/blob/main/A_B_Teat_Results_Analisys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Project Objective:** To analyze A/B testing results using statistical methods in Python and create visualizations that demonstrate key conversion metrics.




In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import chi2_contingency
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Connecting Google Drive
from google.colab import drive
drive.mount("/content/drive")
%cd /content/drive/MyDrive/Python
df = pd.read_csv("results.csv")

Mounted at /content/drive
/content/drive/MyDrive/Python


##**Data Overview**

In [None]:
df.head()

Unnamed: 0,date,country,device,continent,channel,test,test_group,event_name,value
0,2020-11-01,Lithuania,mobile,Europe,Organic Search,2,2,new account,1
1,2020-11-01,El Salvador,desktop,Americas,Social Search,2,1,new account,1
2,2020-11-01,Slovakia,mobile,Europe,Paid Search,2,2,new account,1
3,2020-11-01,Lithuania,desktop,Europe,Paid Search,2,2,new account,1
4,2020-11-02,North Macedonia,desktop,Europe,Direct,2,1,new account,1


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800996 entries, 0 to 800995
Data columns (total 9 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   date        800996 non-null  object
 1   country     800996 non-null  object
 2   device      800996 non-null  object
 3   continent   800996 non-null  object
 4   channel     800996 non-null  object
 5   test        800996 non-null  int64 
 6   test_group  800996 non-null  int64 
 7   event_name  800996 non-null  object
 8   value       800996 non-null  int64 
dtypes: int64(3), object(6)
memory usage: 55.0+ MB



##**Calculating statistical significance**

In [None]:
def calculate_ab_test_significance(df, metrics_config):
    """
    Calculate statistical significance for A/B test results.

    Args:
        df: DataFrame containing test data with columns: test, test_group, and metric columns
        metrics_config: Dictionary mapping metric names to (numerator_col, denominator_col) tuples

    Returns:
        DataFrame with test results including conversion rates, z-statistics, and p-values
    """
    results = []

    # Get unique test numbers
    unique_tests = df['test'].unique()

    for test_num in unique_tests:
        # Filter data for specific test
        test_df = df[df['test'] == test_num].copy()

        # Iterate through all metrics
        for metric_name, formula in metrics_config.items():
            numerator_col, denominator_col = formula

            # Group by test_group (control and test groups)
            grouped = test_df.groupby('test_group').agg({
                numerator_col: 'sum',
                denominator_col: 'sum'
            }).reset_index()

            if len(grouped) != 2:
                print(f"Warning: Test {test_num} doesn't have two groups for metric {metric_name}")
                continue

            # Get data for control and test groups
            control_group = grouped[grouped['test_group'] == 1].iloc[0]
            test_group = grouped[grouped['test_group'] == 2].iloc[0]

            # Calculate metrics
            control_numerator = control_group[numerator_col]
            control_denominator = control_group[denominator_col]
            control_rate = control_numerator / control_denominator if control_denominator > 0 else 0

            test_numerator = test_group[numerator_col]
            test_denominator = test_group[denominator_col]
            test_rate = test_numerator / test_denominator if test_denominator > 0 else 0

            # Calculate conversion rate (conversion coefficient)
            conversion_rate = (test_rate - control_rate) / control_rate if control_rate > 0 else 0

            metric_change = test_rate - control_rate

            # Calculate z-statistic and p-value for proportions
            # Create contingency table for chi2 test
            success_control = control_numerator
            failure_control = control_denominator - control_numerator
            success_test = test_numerator
            failure_test = test_denominator - test_numerator

            # Contingency table: [[success_control, failure_control], [success_test, failure_test]]
            contingency_table = np.array([
                [success_control, failure_control],
                [success_test, failure_test]
            ])

            # Perform chi2 test
            try:
                chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)

                # Alternative calculation of z-statistic for proportions
                pooled_rate = (success_control + success_test) / (control_denominator + test_denominator)
                se = np.sqrt(pooled_rate * (1 - pooled_rate) * (1/control_denominator + 1/test_denominator))

                if se > 0:
                    z_stat = (test_rate - control_rate) / se
                else:
                    z_stat = 0

            except ValueError:
                p_value = 1.0
                z_stat = 0

            # Determine significance
            significant = p_value < 0.05

            # Add result
            results.append({
                'test_number': test_num,
                'metric': metric_name,
                'numerator_control': success_control,
                'denominator_control': control_denominator,
                'numerator_test': success_test,
                'denominator_test': test_denominator,
                'conversion_rate': conversion_rate,
                'metric_change': metric_change,
                'z_stat': z_stat,
                'p_value': p_value,
                'significant': significant
            })

    # Create DataFrame with results
    results_df = pd.DataFrame(results)

    # Format results according to the structure from the second photo
    if not results_df.empty:
        results_df = results_df.round({
            'conversion_rate': 8,
            'metric_change': 8,
            'z_stat': 8,
            'p_value': 8
        })

    return results_df

In [None]:
# Create metrics
metrics = {
    'add_payment_info / session': ('add_payment_info', 'session'),
    'add_shipping_info / session': ('add_shipping_info', 'session'),
    'begin_checkout / session': ('begin_checkout', 'session'),
    'new_accounts / session': ('new account', 'session')
}

In [None]:
def prepare_data_for_analysis(df):
    """
    Prepare data from event_name/value format to analysis format
    """
    # Create pivot table to transform event_name into columns
    pivot_df = df.pivot_table(
        index=['date', 'country', 'device', 'continent', 'channel', 'test', 'test_group'],
        columns='event_name',
        values='value',
        aggfunc='sum',
        fill_value=0
    ).reset_index()

    # If 'session' column doesn't exist, create it
    if 'session' not in pivot_df.columns:
        # Assume each row represents one session
        # or use sum of all events as a proxy for sessions
        pivot_df['session'] = 1

        # Alternatively, can group by unique combinations
        # and count number of unique sessions
        session_counts = df.groupby(['test', 'test_group']).size().reset_index(name='session_count')

        # Or use sum of all events as base for sessions
        event_columns = [col for col in pivot_df.columns if col not in ['date', 'country', 'device', 'continent', 'channel', 'test', 'test_group']]
        if event_columns:
            pivot_df['session'] = pivot_df[event_columns].sum(axis=1)

    return pivot_df

##**View The Results**

In [None]:
prepared_df = prepare_data_for_analysis(df)
results = calculate_ab_test_significance(prepared_df, metrics)
results.head(16)

Unnamed: 0,test_number,metric,numerator_control,denominator_control,numerator_test,denominator_test,conversion_rate,metric_change,z_stat,p_value,significant
0,1,add_payment_info / session,1988,45362,2229,45193,0.12542,0.005497,3.924884,9.3e-05,True
1,1,add_shipping_info / session,3034,45362,3221,45193,0.065605,0.004388,2.603571,0.009585,True
2,1,begin_checkout / session,3784,45362,4021,45193,0.066606,0.005556,2.978783,0.003008,True
3,1,new_accounts / session,3823,45362,3681,45193,-0.033543,-0.002827,-1.542883,0.125812,False
4,2,add_payment_info / session,2344,50637,2409,50244,0.035769,0.001656,1.240994,0.220148,False
5,2,add_shipping_info / session,3480,50637,3510,50244,0.01651,0.001135,0.709557,0.485703,False
6,2,begin_checkout / session,4262,50637,4313,50244,0.019882,0.001673,0.952898,0.346393,False
7,2,new_accounts / session,4165,50637,4184,50244,0.012419,0.001022,0.588793,0.563692,False
8,3,add_payment_info / session,3623,70047,3697,70439,0.014746,0.000763,0.643172,0.527931,False
9,3,add_shipping_info / session,5298,70047,5188,70439,-0.026212,-0.001983,-1.413727,0.160445,False


##**Saving The Results**

In [None]:
results['significance_label'] = results['significant'].map({True: 'Significant', False: 'Not Significant'})
def categorize_p_value(p_val):
    if p_val < 0.001:
        return 'Highly Significant (p < 0.001)'
    elif p_val < 0.01:
        return 'Very Significant (p < 0.01)'
    elif p_val < 0.05:
        return 'Significant (p < 0.05)'
    else:
        return 'Not Significant (p >= 0.05)'

results['p_value_category'] = results['p_value'].apply(categorize_p_value)
results.to_excel('ab_test_results_enhanced.xlsx', index=False)

###**A/B Test Statistical Analysis Tool**
This code automates statistical significance testing for A/B experiments across multiple conversion metrics. It processes test data to determine whether observed differences between control and test groups are statistically meaningful or due to random variation.
Key Features

**Multi-metric analysis**: Evaluates conversion rates for payment, shipping, checkout, and account creation events
Statistical validation: Uses chi-square testing and z-statistics to calculate p-values and significance at 95% confidence level
Data processing: Transforms raw event data into analysis-ready format with proper group segmentation
Automated reporting: Outputs conversion lift percentages, statistical significance flags, and confidence metrics

**Business Impact**
Eliminates guesswork in experiment evaluation by providing statistically robust evidence for product decisions. Ensures only validated improvements with genuine user impact proceed to full implementation, reducing risk of deploying ineffective changes.
The tool delivers clear go/no-go decisions with quantified conversion improvements and confidence levels necessary for stakeholder approval.

**Project links**
* [CSV with significant results](https://drive.google.com/file/d/1WHqpFMR-6rAXozuwGWSspe1NlvO_6dRp/view?usp=sharing)
* [Tableau Dashboard](https://public.tableau.com/views/ABTest_17480984647880/ABTest?:language=en-US&publish=yes&:sid=&:redirect=auth&:display_count=n&:origin=viz_share_link)