In [None]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
import scipy.stats as stats
from statsmodels.stats.weightstats import ztest
from scipy.stats import chi2_contingency, ttest_ind, f_oneway
# Step 2: Load the dataset
df = pd.read_csv('Titanic-Dataset.csv')
# Step 3: Explore dataset
print("Dataset Head:")
print(df.head())
# Step 4: Check for missing values and basic info
print("\nMissing Values:\n", df.isnull().sum())
print("\nDataset Info:\n", df.info())

# Step 5: Perform Z-Test
# Example: Comparing the mean age of males and females
male_ages = df[df['Sex'] == 'male']['Age'].dropna()  # Remove NaN values
female_ages = df[df['Sex'] == 'female']['Age'].dropna()

z_stat, p_value_z = ztest(male_ages, female_ages)

print("\nZ-Test Results:")
print(f"Z-Statistic: {z_stat:.4f}, P-Value: {p_value_z:.4f}")

# Interpretation
alpha = 0.05
if p_value_z < alpha:
    print("Conclusion: Significant difference in mean age between males and females.")
else:
    print("Conclusion: No significant difference in mean age between males and females.")

# Step 6: Perform T-Test (p-test)
# Example: Comparing the mean age of males and females
male_ages = df[df['Sex'] == 'male']['Age'].dropna()  # Remove NaN values
female_ages = df[df['Sex'] == 'female']['Age'].dropna()

t_stat, p_value_t = ttest_ind(male_ages, female_ages)

print("\nT-Test (p-test) Results:")
print(f"T-Statistic: {t_stat:.4f}, P-Value: {p_value_t:.4f}")

# Interpretation
alpha = 0.05
if p_value_t < alpha:
    print("Conclusion: Significant difference in mean age between males and females.")
else:
    print("Conclusion: No significant difference in mean age between males and females.")

# Step 7: Perform Chi-Square Test
# Example: Checking if "Survived" and "Sex" are independent
contingency_table = pd.crosstab(df["Survived"], df["Sex"])

chi2_stat, p_value_chi, dof, expected = chi2_contingency(contingency_table)

print("\nChi-Square Test Results:")
print(f"Chi-Square Statistic: {chi2_stat:.4f}, P-Value: {p_value_chi:.4f}")
print(f"Degrees of Freedom: {dof}")

# Interpretation
alpha = 0.05
if p_value_chi < alpha:
    print("Conclusion: 'Survived' and 'Sex' are dependent (statistically significant relationship).")
else:
    print("Conclusion: 'Survived' and 'Sex' are independent (no statistically significant relationship).")


# Step 8: Perform ANOVA Test
# Example: Comparing mean age across Passenger Classes (Pclass)
pclass_1 = df[df["Pclass"] == 1]["Age"].dropna()  # Remove NaN values
pclass_2 = df[df["Pclass"] == 2]["Age"].dropna()
pclass_3 = df[df["Pclass"] == 3]["Age"].dropna()

anova_stat, p_value_anova = f_oneway(pclass_1, pclass_2, pclass_3)

print("\nANOVA Test Results:")
print(f"F-Statistic: {anova_stat:.4f}, P-Value: {p_value_anova:.4f}")

# Interpretation
alpha = 0.05
if p_value_anova < alpha:
    print("Conclusion: Significant difference in mean age across Passenger Classes (Pclass).")
else:
    print("Conclusion: No significant difference in mean age across Passenger Classes (Pclass).")