In [7]:
import pandas as pd
import numpy as np
df = pd.read_csv('online_shoppers_enriched_V4.csv')

In [8]:
from scipy.stats import ttest_ind

print("\nT-TESTS: Do means differ between buyers and non-buyers?\n")
for feature in ['Engagement_Score', 'Avg_Time_Per_Page', 'Product_Ratio', 'BounceRates', 'ExitRates']:
    buyers = df[df['Revenue']==1][feature]
    nonbuyers = df[df['Revenue']==0][feature]
    stat, pval = ttest_ind(buyers, nonbuyers, equal_var=False)
    print(f"{feature}: t={stat:.2f}, p={pval:.4f}, buyers mean={buyers.mean():.2f}, non-buyers mean={nonbuyers.mean():.2f}")



T-TESTS: Do means differ between buyers and non-buyers?

Engagement_Score: t=0.01, p=0.9893, buyers mean=18.14, non-buyers mean=18.12
Avg_Time_Per_Page: t=-1.68, p=0.0955, buyers mean=31.01, non-buyers mean=34.64
Product_Ratio: t=-6.43, p=0.0000, buyers mean=0.79, non-buyers mean=0.92
BounceRates: t=-2.30, p=0.0227, buyers mean=0.01, non-buyers mean=0.01
ExitRates: t=-0.20, p=0.8453, buyers mean=0.03, non-buyers mean=0.03


In [9]:
from scipy.stats import chi2_contingency

# Returning visitor vs Revenue
table = pd.crosstab(df['Is_Returning'], df['Revenue'])
chi2, p, dof, expected = chi2_contingency(table)
print(f"\nCHI-SQUARE TEST: Is_Returning vs Revenue: chi2={chi2:.2f}, p={p:.4f}")

# Weekend vs Revenue
table2 = pd.crosstab(df['Weekend'], df['Revenue'])
chi2, p, dof, expected = chi2_contingency(table2)
print(f"CHI-SQUARE TEST: Weekend vs Revenue: chi2={chi2:.2f}, p={p:.4f}")



CHI-SQUARE TEST: Is_Returning vs Revenue: chi2=47.91, p=0.0000
CHI-SQUARE TEST: Weekend vs Revenue: chi2=5.62, p=0.0177


In [10]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Ensure Revenue is int (0 or 1)
df['Revenue'] = df['Revenue'].astype(int)

anova = ols('Revenue ~ C(Month)', data=df).fit()
anova_table = sm.stats.anova_lm(anova, typ=2)
print("ANOVA for conversion by month:")
print(anova_table)


anova = ols('Revenue ~ C(Month)', data=df).fit()
anova_table = sm.stats.anova_lm(anova, typ=2)
print("\nANOVA for conversion by month:\n", anova_table)


ANOVA for conversion by month:
              sum_sq      df          F        PR(>F)
C(Month)    6.944418     9.0  26.050044  2.133423e-44
Residual  153.757370  5191.0        NaN           NaN

ANOVA for conversion by month:
               sum_sq      df          F        PR(>F)
C(Month)    6.944418     9.0  26.050044  2.133423e-44
Residual  153.757370  5191.0        NaN           NaN


In [11]:
import pandas as pd
from scipy.stats import chi2_contingency

# Create contingency table
table = pd.crosstab(df['VisitorType'], df['Revenue'])

# Chi-square test
chi2, p, dof, expected = chi2_contingency(table)
print(f"Chi-square: {chi2:.2f}, p-value: {p:.4f}")


Chi-square: 51.29, p-value: 0.0000
