In [12]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

# -----------------------------
# Step 1: Create sample dataset
# -----------------------------
data = {
    'gender': np.random.choice(['Male', 'Female'], 50),
    'teaching_evaluation': np.random.uniform(1, 5, 50),
    'age': np.random.randint(25, 60, 50),
    'beauty': np.random.uniform(1, 10, 50),
    'tenure': np.random.choice(['Yes', 'No'], 50)
}

df = pd.DataFrame(data)



CSV file created: teachers_rating.csv

Q1. T-Test result:
T-statistic: -1.6875282010996
P-value: 0.09799056575202363

Q2. ANOVA result:
                  sum_sq    df         F    PR(>F)
C(age_group)    9.295262   3.0  0.451499  0.717465
Residual      315.675885  46.0       NaN       NaN

Q3. Chi-Square result:
Chi2: 0.08116883116883117
P-value: 0.7757196518444691
Degrees of Freedom: 1
Expected Frequencies:
 [[11. 11.]
 [14. 14.]]

Q4. Correlation result:
Correlation coefficient: -0.041879798961458825
P-value: 0.772756246128844


In [14]:
# Save dataset to CSV
df.to_csv("teachers_rating.csv", index=False)
print("CSV file created: teachers_rating.csv")

# ----------------------------------
# Q1. T-Test: Does gender affect teaching evaluation?
# ----------------------------------
male_eval = df[df['gender'] == 'Male']['teaching_evaluation']
female_eval = df[df['gender'] == 'Female']['teaching_evaluation']

t_stat, p_value = stats.ttest_ind(male_eval, female_eval)
print("\nQ1. T-Test result:")
print("T-statistic:", t_stat)
print("P-value:", p_value)



CSV file created: teachers_rating.csv

Q1. T-Test result:
T-statistic: -1.6875282010996
P-value: 0.09799056575202363


In [15]:
# ----------------------------------
# Q2. ANOVA: Does beauty differ by age group?
# ----------------------------------
# Let's create age groups: <30, 30-40, 40-50, >50
df['age_group'] = pd.cut(df['age'], bins=[20, 30, 40, 50, 60], labels=['20-30','30-40','40-50','50-60'])

anova_model = ols('beauty ~ C(age_group)', data=df).fit()
anova_table = sm.stats.anova_lm(anova_model, typ=2)
print("\nQ2. ANOVA result:")
print(anova_table)




Q2. ANOVA result:
                  sum_sq    df         F    PR(>F)
C(age_group)    9.295262   3.0  0.451499  0.717465
Residual      315.675885  46.0       NaN       NaN


In [16]:
# ----------------------------------
# Q3. Chi-Square: Association between tenure and gender
# ----------------------------------
contingency_table = pd.crosstab(df['tenure'], df['gender'])
chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
print("\nQ3. Chi-Square result:")
print("Chi2:", chi2)
print("P-value:", p)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)




Q3. Chi-Square result:
Chi2: 0.08116883116883117
P-value: 0.7757196518444691
Degrees of Freedom: 1
Expected Frequencies:
 [[11. 11.]
 [14. 14.]]


In [17]:
# ----------------------------------
# Q4. Correlation: Teaching evaluation vs beauty score
# ----------------------------------
correlation, p_corr = stats.pearsonr(df['teaching_evaluation'], df['beauty'])
print("\nQ4. Correlation result:")
print("Correlation coefficient:", correlation)
print("P-value:", p_corr)



Q4. Correlation result:
Correlation coefficient: -0.041879798961458825
P-value: 0.772756246128844
