In [4]:
# 1.
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
# Load seaborn dataset
df = sns.load_dataset("tips")
df = df.rename(columns={
    "sex": "gender",
    "total_bill": "beauty",     
    "tip": "eval",              
    "size": "age"               
})

df.head()

Unnamed: 0,beauty,eval,gender,smoker,day,time,age
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
# Encode gender (male=0, female=1)
df["gender"] = df["gender"].map({"Male": 0, "Female": 1})

# Regression model: eval ~ gender
model = smf.ols("eval ~ gender", data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   eval   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.926
Date:                Mon, 27 Oct 2025   Prob (F-statistic):              0.166
Time:                        22:56:49   Log-Likelihood:                -423.98
No. Observations:                 244   AIC:                             852.0
Df Residuals:                     242   BIC:                             859.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       3.0896      0.110     28.032      

In [8]:
# 2.
# Create age groups
df["age_group"] = pd.cut(df["age"], bins=[1, 2, 4, 6], labels=["Young", "Middle", "Old"])

# ANOVA model
anova_model = ols("beauty ~ C(age_group)", data=df).fit()
anova_table = sm.stats.anova_lm(anova_model, typ=2)
print(anova_table)

                    sum_sq     df          F        PR(>F)
C(age_group)   5923.939294    2.0  55.416408  1.799984e-20
Residual      12667.490306  237.0        NaN           NaN


In [12]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
data = pd.DataFrame({
    'eval': [4.8, 4.6, 4.3, 4.5, 3.8, 4.0, 4.7, 4.2, 3.9, 4.4],
    'beauty': [8.0, 7.5, 6.0, 6.8, 4.5, 5.2, 7.1, 6.2, 4.9, 6.6]
})

model = smf.ols('eval ~ beauty', data=data).fit()
print(model.summary())

corr = data['eval'].corr(data['beauty'])
print(f"\nCorrelation between evaluation and beauty score: {corr:.3f}")

print("\nConclusion:")
if model.pvalues['beauty'] < 0.05:
    print(f"Beauty score significantly affects teaching evaluation (p = {model.pvalues['beauty']:.4f})")
    print(f"For every 1 unit increase in beauty score, evaluation increases by {model.params['beauty']:.3f} points.")
else:
    print(f"No significant relationship (p = {model.pvalues['beauty']:.4f})")

                            OLS Regression Results                            
Dep. Variable:                   eval   R-squared:                       0.962
Model:                            OLS   Adj. R-squared:                  0.957
Method:                 Least Squares   F-statistic:                     200.7
Date:                Tue, 28 Oct 2025   Prob (F-statistic):           6.00e-07
Time:                        15:15:58   Log-Likelihood:                 13.357
No. Observations:                  10   AIC:                            -22.71
Df Residuals:                       8   BIC:                            -22.11
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.4789      0.132     18.794      0.0

