<h1 style="color:lightblue; text-align:center;">Hypothesis Testing</h1>


In [1]:
import numpy as np
import pandas as pd
from scipy.stats import spearmanr

In [2]:
# Load the preprocessed dataset
df = pd.read_csv("/kaggle/input/online-purchase-intentions-during-crises/preprocessed_dataset.csv")

print("Column names in the dataset:")
for col in df.columns:
    print(col)

Column names in the dataset:
age
education
used_before
profession
EaseofNavigation1
EaseofNavigation2
LearnCurve1
LearnCurve2
ClarityofInstructions1
ClarityofInstructions2
ResponseTime1
ResponseTime2
ErrorHandling1
ErrorHandling2
ProductAvailability1
ProductAvailability2
Convenience1
Convenience2
Cost-effectiveness1
Cost-effectiveness2
InformationAccessibility1
InformationAccessibility2
Personalization1
Personalization2
PrivacyProtection1
PrivacyProtection2
PaymentSecurity1
PaymentSecurity2
TransparentPolicies1
TransparentPolicies2
WordofMouthandRecommendations1
WordofMouthandRecommendations2
SocialMediaPosts/Influence1
SocialMediaPosts/Influence2
OnlineReviewsandRatings1
OnlineReviewsandRatings2
SocialProof1
SocialProof2
NormativePressure1
NormativePressure2
InformationSharing1
InformationSharing2
Attitude1
Attitude2
PerceivedRisk1
PerceivedRisk2
IntentiontoPurchaseOnline1
IntentiontoPurchaseOnline2
IntentiontoPurchaseOnline3
IntentiontoPurchaseOnline4
gender_Female
gender_Male
marita

# Perceived ease of use has no impact on intention to purchase.

In [3]:
# Perceived ease of use has no impact on intention to purchase.

# Define the columns related to Perceived Ease of Use (PEOU)
peou_columns = [
    'EaseofNavigation1', 'EaseofNavigation2',
    'LearnCurve1', 'LearnCurve2',
    'ClarityofInstructions1', 'ClarityofInstructions2',
    'ResponseTime1', 'ResponseTime2',
    'ErrorHandling1', 'ErrorHandling2'
]

# Define the columns related to Intention to Purchase Online
intention_columns = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]

# Compute the average score for each participant
df['PEOU'] = df[peou_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['PEOU', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['PEOU'], correlation_df['Intention'])

# Display the result
print("Spearman's rho:", spearman_corr)
print("p-value:", p_value)

if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship exists.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho: 0.19360578301736173
p-value: 2.0754481478398974e-08
Result: Reject H₀ → Significant relationship exists.


# Perceived usefulness has no impact on the intention to purchase online.

In [4]:
# Perceived usefulness has no impact on the intention to purchase online.


# Correct PU columns (case-sensitive)
pu_columns = [
    'ProductAvailability1', 'ProductAvailability2',
    'Convenience1', 'Convenience2','Cost-effectiveness1','Cost-effectiveness2',
    'InformationAccessibility1', 'InformationAccessibility2',
    'Personalization1', 'Personalization2'
]

# Compute averages
df['PU'] = df[pu_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing
correlation_df = df[['PU', 'Intention']].dropna()

# Spearman correlation
spearman_corr, p_value = spearmanr(correlation_df['PU'], correlation_df['Intention'])

# Output
print("Spearman's rho:", spearman_corr)
print("p-value:", p_value)

if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship exists.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho: 0.19027769499254027
p-value: 3.6413919408969154e-08
Result: Reject H₀ → Significant relationship exists.


# Having structural assurance has no impact on the intention to make an online purchase.

In [5]:
# Having structural assurance has no impact on the intention to make an online purchase.

# Structural Assurance columns
sa_columns = [
    'PrivacyProtection1', 'PrivacyProtection2',
    'PaymentSecurity1', 'PaymentSecurity2',
    'TransparentPolicies1', 'TransparentPolicies2'
]

# Compute mean scores
df['SA'] = df[sa_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['SA', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['SA'], correlation_df['Intention'])

# Display the result
print("Spearman's rho (SA vs Intention):", spearman_corr)
print("p-value:", p_value)

# Interpretation
if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship exists between Structural Assurance and Intention to Purchase.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho (SA vs Intention): 0.18097510121525173
p-value: 1.6632496435783662e-07
Result: Reject H₀ → Significant relationship exists between Structural Assurance and Intention to Purchase.


# Social influence has no impact on the intention to purchase online.

In [6]:
# Social influence has no impact on the intention to purchase online.

# Define columns related to Social Influence
social_influence_columns = [
    'WordofMouthandRecommendations1', 'WordofMouthandRecommendations2',
    'SocialMediaPosts/Influence1', 'SocialMediaPosts/Influence2',
    'OnlineReviewsandRatings1', 'OnlineReviewsandRatings2',
    'SocialProof1', 'SocialProof2',
    'NormativePressure1', 'NormativePressure2',
    'InformationSharing1', 'InformationSharing2'
]

# Compute average scores
df['SocialInfluence'] = df[social_influence_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['SocialInfluence', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['SocialInfluence'], correlation_df['Intention'])

# Display the result
print("Spearman's rho (Social Influence vs Intention):", spearman_corr)
print("p-value:", p_value)

# Interpretation
if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship between Social Influence and Intention to Purchase.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho (Social Influence vs Intention): 0.257216811279178
p-value: 6.218716323440867e-14
Result: Reject H₀ → Significant relationship between Social Influence and Intention to Purchase.


# The attitude towards online shopping does not mediate the relationship between perceived ease of use and the intention to purchase online.

In [7]:
import statsmodels.formula.api as smf
from math import sqrt
from scipy.stats import norm

#The attitude towards online shopping does not mediate the relationship between perceived ease of use and the intention to purchase online.


# Define columns for each construct
peou_columns = [
    'EaseofNavigation1', 'EaseofNavigation2',
    'LearnCurve1', 'LearnCurve2',
    'ClarityofInstructions1', 'ClarityofInstructions2',
    'ResponseTime1', 'ResponseTime2',
    'ErrorHandling1', 'ErrorHandling2'
]

attitude_columns = ['Attitude1', 'Attitude2']

# Compute construct means
df['PEOU'] = df[peou_columns].mean(axis=1)
df['Attitude'] = df[attitude_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop rows with missing values
df_clean = df[['PEOU', 'Attitude', 'Intention']].dropna()

# Step 1: Total effect (c) — Intention ~ PEOU
model_c = smf.ols('Intention ~ PEOU', data=df_clean).fit()

# Step 2: Path a — Attitude ~ PEOU
model_a = smf.ols('Attitude ~ PEOU', data=df_clean).fit()

# Step 3: Path b and c′ — Intention ~ PEOU + Attitude
model_b_cprime = smf.ols('Intention ~ PEOU + Attitude', data=df_clean).fit()

# Sobel test for indirect effect (a × b)
a = model_a.params['PEOU']
sa = model_a.bse['PEOU']
b = model_b_cprime.params['Attitude']
sb = model_b_cprime.bse['Attitude']
sobel_z = (a * b) / sqrt(b**2 * sa**2 + a**2 * sb**2)
sobel_p = 2 * (1 - norm.cdf(abs(sobel_z)))

# Print all results
print("\nStep 1: Total Effect (c) — Intention ~ PEOU")
print(model_c.summary())

print("\nStep 2: Path a — Attitude ~ PEOU")
print(model_a.summary())

print("\nStep 3: Path b and c′ — Intention ~ PEOU + Attitude")
print(model_b_cprime.summary())

print("\nSobel Test for Indirect Effect (a × b):")
print(f"Z = {sobel_z:.4f}")
print(f"p-value = {sobel_p:.4f}")

# Final Verdict
if sobel_p < 0.05:
    print("Reject H₀: Attitude significantly mediates the relationship between PEOU and Intention.")
else:
    print("Fail to reject H₀: No significant mediation effect of Attitude.")



Step 1: Total Effect (c) — Intention ~ PEOU
                            OLS Regression Results                            
Dep. Variable:              Intention   R-squared:                       0.075
Model:                            OLS   Adj. R-squared:                  0.074
Method:                 Least Squares   F-statistic:                     66.41
Date:                Tue, 13 May 2025   Prob (F-statistic):           1.36e-15
Time:                        20:42:07   Log-Likelihood:                -597.56
No. Observations:                 825   AIC:                             1199.
Df Residuals:                     823   BIC:                             1209.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Interce

# Ease of navigation has no impact on the intention to purchase online.

In [8]:
# Ease of navigation has no impact on the intention to purchase online.

# Define the columns for Ease of Navigation
navigation_columns = [
    'EaseofNavigation1', 'EaseofNavigation2'
]

# Compute the average scores
df['EaseOfNavigation'] = df[navigation_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['EaseOfNavigation', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['EaseOfNavigation'], correlation_df['Intention'])

# Display the result
print("Spearman's rho (Ease of Navigation vs Intention):", spearman_corr)
print("p-value:", p_value)

# Optional interpretation
if p_value < 0.05:
    print("Result: Reject H₀ → Ease of Navigation significantly correlates with Intention to Purchase.")
else:
    print("Result: Fail to reject H₀ → No significant relationship detected.")

Spearman's rho (Ease of Navigation vs Intention): 0.14202818873951203
p-value: 4.2393032685419935e-05
Result: Reject H₀ → Ease of Navigation significantly correlates with Intention to Purchase.


# Clarity of instructions has no impact on the intention to purchase online.

In [9]:
# Clarity of instructions has no impact on the intention to purchase online.

# Load the preprocessed dataset
df = pd.read_csv("/kaggle/input/online-purchase-intentions-during-crises/preprocessed_dataset.csv")

# Define Clarity of Instructions columns
clarity_columns = ['ClarityofInstructions1', 'ClarityofInstructions2']


# Compute average scores
df['Clarity'] = df[clarity_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['Clarity', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['Clarity'], correlation_df['Intention'])

# Display results
print("Spearman's rho (Clarity vs Intention):", spearman_corr)
print("p-value:", p_value)

# Interpretation
if p_value < 0.05:
    print("Reject H₀: Clarity of instructions significantly impacts intention to purchase.")
else:
    print("Fail to reject H₀: No significant relationship.")

Spearman's rho (Clarity vs Intention): 0.20584246611822185
p-value: 2.4102278405661193e-09
Reject H₀: Clarity of instructions significantly impacts intention to purchase.


# Response Time has no impact on intention to purchase online.

In [10]:
# Response Time has no impact on intention to purchase online.

# Define columns related to Response Time
response_time_columns = ['ResponseTime1', 'ResponseTime2']


# Compute average scores for each participant
df['ResponseTime'] = df[response_time_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop missing values
correlation_df = df[['ResponseTime', 'Intention']].dropna()

# Compute Spearman rank correlation
spearman_corr, p_value = spearmanr(correlation_df['ResponseTime'], correlation_df['Intention'])

# Display the results
print("Spearman's rho (Response Time vs Intention):", spearman_corr)
print("p-value:", p_value)

# Interpretation
if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship exists between Response Time and Intention.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho (Response Time vs Intention): 0.07607996063588794
p-value: 0.028882628417328946
Result: Reject H₀ → Significant relationship exists between Response Time and Intention.


# Error handling has no impact on intention to purchase online

In [11]:
# Error handling has no impact on intention to purchase online

# Define the columns related to Error Handling
error_handling_columns = ['ErrorHandling1', 'ErrorHandling2']

# Compute the average score for each construct
df['ErrorHandling'] = df[error_handling_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Drop rows with missing values
correlation_df = df[['ErrorHandling', 'Intention']].dropna()

# Compute Spearman's rank correlation
spearman_corr, p_value = spearmanr(correlation_df['ErrorHandling'], correlation_df['Intention'])

# Display the result
print("Spearman's rho (Error Handling vs Intention):", spearman_corr)
print("p-value:", p_value)

# Interpret the result
if p_value < 0.05:
    print("Result: Reject H₀ → Significant relationship exists between Error Handling and Intention to Purchase.")
else:
    print("Result: Fail to reject H₀ → No significant relationship.")

Spearman's rho (Error Handling vs Intention): 0.14893676915712514
p-value: 1.7439795709379758e-05
Result: Reject H₀ → Significant relationship exists between Error Handling and Intention to Purchase.


# Part (b)

In addition to the above hypotheses, create your own 10 hypotheses based
 on the given conceptual diagram and validate them using suitable statistical
 tests

In [12]:
# Define the intention columns globally (used in every case)
intention_columns = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]
df['Intention'] = df[intention_columns].mean(axis=1)

# Define a reusable function
def test_spearman_correlation(factor_name, columns):
    """
    Computes and prints Spearman correlation between a given factor and purchase intention.
    
    Parameters:
    - factor_name: str, name of the new column to be used for the factor
    - columns: list of str, names of the columns representing the subfactor items
    """
    df[factor_name] = df[columns].mean(axis=1)
    corr_df = df[[factor_name, 'Intention']].dropna()
    rho, p = spearmanr(corr_df[factor_name], corr_df['Intention'])
    print(f"\n--- {factor_name} ---")
    print(f"Spearman's rho: {rho:.3f}")
    print(f"p-value       : {p:.4g}")
    if p < 0.05:
        print("✅ Significant relationship (reject H₀)")
    else:
        print("❌ Not significant (fail to reject H₀)")

### 1. Personalization has no impact on the intention to purchase online.

In [13]:
test_spearman_correlation("Personalization", ['Personalization1', 'Personalization2'])


--- Personalization ---
Spearman's rho: 0.060
p-value       : 0.08348
❌ Not significant (fail to reject H₀)


### 2. word of mouth has no impact on the intention to purchase online.

In [14]:
test_spearman_correlation("Word of Mouth", ['WordofMouthandRecommendations1', 'WordofMouthandRecommendations2'])


--- Word of Mouth ---
Spearman's rho: 0.195
p-value       : 1.609e-08
✅ Significant relationship (reject H₀)


### 3. Normative Pressure has no impact on the intention to purchase online

In [15]:
test_spearman_correlation("Normative Pressure", ['NormativePressure1', 'NormativePressure2'])


--- Normative Pressure ---
Spearman's rho: 0.111
p-value       : 0.001367
✅ Significant relationship (reject H₀)


### 4. Learning Curve has no impact on the intention to purchase online

In [16]:
test_spearman_correlation("Learning Curve", ['LearnCurve1', 'LearnCurve2'])


--- Learning Curve ---
Spearman's rho: 0.221
p-value       : 1.409e-10
✅ Significant relationship (reject H₀)


### 5. Privacy Protection has no impact on the intention to purchase online

In [17]:
test_spearman_correlation("Privacy Protection", ['PrivacyProtection1', 'PrivacyProtection2'])


--- Privacy Protection ---
Spearman's rho: 0.107
p-value       : 0.00207
✅ Significant relationship (reject H₀)


### 6. Perceived risk does not mediate the relationship between social influence and intention to purchase online.

Intention = β₀ + β₁*PEOU + β₂*MaritalStatus + β₃*(PEOU × MaritalStatus) + error

In [18]:
from statsmodels.formula.api import ols

# Define variable groups
social_influence_cols = [
    'WordofMouthandRecommendations1', 'WordofMouthandRecommendations2',
    'SocialMediaPosts/Influence1', 'SocialMediaPosts/Influence2',
    'OnlineReviewsandRatings1', 'OnlineReviewsandRatings2',
    'SocialProof1', 'SocialProof2',
    'NormativePressure1', 'NormativePressure2',
    'InformationSharing1', 'InformationSharing2'
]

risk_cols = ['PerceivedRisk1', 'PerceivedRisk2']

intention_cols = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]

# Compute mean scores
df['SocialInfluence'] = df[social_influence_cols].mean(axis=1)
df['Risk'] = df[risk_cols].mean(axis=1)
df['Intention'] = df[intention_cols].mean(axis=1)

# Drop missing values
med_df = df[['SocialInfluence', 'Risk', 'Intention']].dropna()

# Step 1: Total effect (c path)
model1 = ols("Intention ~ SocialInfluence", data=med_df).fit()
c_path = model1.params['SocialInfluence']
c_pval = model1.pvalues['SocialInfluence']

# Step 2: a path
model2 = ols("Risk ~ SocialInfluence", data=med_df).fit()
a_path = model2.params['SocialInfluence']
a_pval = model2.pvalues['SocialInfluence']

# Step 3: Direct + Mediator (c' and b paths)
model3 = ols("Intention ~ SocialInfluence + Risk", data=med_df).fit()
c_prime = model3.params['SocialInfluence']
b_path = model3.params['Risk']
c_prime_pval = model3.pvalues['SocialInfluence']
b_pval = model3.pvalues['Risk']

# Output results
print("----- Mediation Test Summary -----")
print(f"Step 1 (c):  SocialInfluence → Intention   coef = {c_path:.3f},  p = {c_pval:.4g}")
print(f"Step 2 (a):  SocialInfluence → Risk        coef = {a_path:.3f},  p = {a_pval:.4g}")
print(f"Step 3 (b):  Risk → Intention               coef = {b_path:.3f},  p = {b_pval:.4g}")
print(f"Step 3 (c’): SocialInfluence → Intention   coef = {c_prime:.3f}, p = {c_prime_pval:.4g}")

# Interpret mediation
if a_pval < 0.05 and b_pval < 0.05:
    if c_prime_pval < 0.05 and abs(c_prime) < abs(c_path):
        print("\n✅ Partial mediation — Reject H₀")
    elif c_prime_pval >= 0.05:
        print("\n✅ Full mediation — Reject H₀")
    else:
        print("\n❌ No mediation — Fail to reject H₀")
else:
    print("\n❌ No mediation — Fail to reject H₀")


----- Mediation Test Summary -----
Step 1 (c):  SocialInfluence → Intention   coef = 0.304,  p = 9.868e-24
Step 2 (a):  SocialInfluence → Risk        coef = 0.271,  p = 8.842e-12
Step 3 (b):  Risk → Intention               coef = 0.342,  p = 6.533e-44
Step 3 (c’): SocialInfluence → Intention   coef = 0.211, p = 1.21e-14

✅ Partial mediation — Reject H₀


###  7. Age does not moderate the relationship between Perceived Usefulness and Intention to Purchase Online.

In [19]:
import statsmodels.formula.api as smf

# Define columns
pu_columns = [
    'ProductAvailability1', 'ProductAvailability2',
    'Convenience1', 'Convenience2',
    'Cost-effectiveness1', 'Cost-effectiveness2',
    'InformationAccessibility1', 'InformationAccessibility2',
    'Personalization1', 'Personalization2'
]
intention_columns = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]

# Compute averages
df['PU'] = df[pu_columns].mean(axis=1)
df['Intention'] = df[intention_columns].mean(axis=1)

# Ensure age is numeric (convert if needed)
df['Age'] = pd.to_numeric(df['age'], errors='coerce')

# Drop missing values
mod_df = df[['PU', 'Age', 'Intention']].dropna()

# Create interaction term
mod_df['PUxAge'] = mod_df['PU'] * mod_df['Age']

# Regression model with interaction
model = smf.ols("Intention ~ PU + Age + PUxAge", data=mod_df).fit()

# Interpretation
pval_interaction = model.pvalues['PUxAge']
print(pval_interaction)
if pval_interaction < 0.05:
    print("\n✅ Age significantly moderates the relationship (Reject H₀)")
else:
    print("\n❌ Age does NOT significantly moderate the relationship (Fail to reject H₀)")

0.01327596536818294

✅ Age significantly moderates the relationship (Reject H₀)


### 8. Marital status does not moderate the relationship between Perceived Ease of Use (PEOU) and Intention to Purchase Online.

In [20]:
# Define PEOU columns
peou_cols = [
    'EaseofNavigation1', 'EaseofNavigation2',
    'LearnCurve1', 'LearnCurve2',
    'ClarityofInstructions1', 'ClarityofInstructions2',
    'ResponseTime1', 'ResponseTime2',
    'ErrorHandling1', 'ErrorHandling2'
]

# Intention columns
intention_cols = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]

# Compute averages
df['PEOU'] = df[peou_cols].mean(axis=1)
df['Intention'] = df[intention_cols].mean(axis=1)

# Encode Marital Status (you may already have dummy columns, if not do this)
if 'marital_status_Married' not in df.columns:
    df['Married'] = df['marital_status_Married'] = df['marital_status'].apply(lambda x: 1 if x == 'Married' else 0)

# Drop missing
mod_df = df[['PEOU', 'marital_status_Married', 'Intention']].dropna()

# Create interaction term
mod_df['PEOUxMarried'] = mod_df['PEOU'] * mod_df['marital_status_Married']

# Fit the moderation model
model = smf.ols("Intention ~ PEOU + marital_status_Married + PEOUxMarried", data=mod_df).fit()

# Interpretation logic
pval_interaction = model.pvalues['PEOUxMarried']
print("pval:" ,pval_interaction)
if pval_interaction < 0.05:
    print("\n✅ Marital status significantly moderates the relationship (Reject H₀)")
else:
    print("\n❌ No significant moderation by marital status (Fail to reject H₀)")

pval: 0.1491320561538677

❌ No significant moderation by marital status (Fail to reject H₀)


### 9. Gender does not moderate the relationship between PU and Intention.

In [21]:
# Define columns for Perceived Usefulness (PU)
pu_cols = [
    'ProductAvailability1', 'ProductAvailability2',
    'Convenience1', 'Convenience2',
    'Cost-effectiveness1', 'Cost-effectiveness2',
    'InformationAccessibility1', 'InformationAccessibility2',
    'Personalization1', 'Personalization2'
]

# Define Intention columns
intention_cols = [
    'IntentiontoPurchaseOnline1',
    'IntentiontoPurchaseOnline2',
    'IntentiontoPurchaseOnline3',
    'IntentiontoPurchaseOnline4'
]

# Calculate averages
df['PU'] = df[pu_cols].mean(axis=1)
df['Intention'] = df[intention_cols].mean(axis=1)

# Ensure gender is binary coded (e.g., Male = 1, Female = 0)
if 'gender_Male' not in df.columns:
    df['gender_Male'] = df['gender'].apply(lambda x: 1 if x.lower() == 'male' else 0)

# Drop missing
mod_df = df[['PU', 'Intention', 'gender_Male']].dropna()

# Create interaction term
mod_df['PUxGender'] = mod_df['PU'] * mod_df['gender_Male']

# Fit the moderation model
model = smf.ols("Intention ~ PU + gender_Male + PUxGender", data=mod_df).fit()

# Interpret the interaction term
pval_interaction = model.pvalues['PUxGender']
if pval_interaction < 0.05:
    print("\n✅ Gender significantly moderates the relationship (Reject H₀)")
else:
    print("\n❌ No significant moderation by gender (Fail to reject H₀)")


❌ No significant moderation by gender (Fail to reject H₀)


### 10. Convenience has no impact on the intention to purchase online

In [22]:
test_spearman_correlation("Convenience", ['Convenience1', 'Convenience2'])


--- Convenience ---
Spearman's rho: 0.240
p-value       : 2.991e-12
✅ Significant relationship (reject H₀)
