In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [4]:
tips = sns.load_dataset("tips")
tips.head()



Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
# Check for missing values
tips.isnull().sum()
#

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

In [23]:

# Encode categorical variables
categorical_cols = ['sex', 'smoker', 'day', 'time']
numerical_cols = ['total_bill', 'size']

# Define preprocessing steps
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Split data into features and target
X = tips.drop('tip', axis=1)
y = tips['tip']

In [24]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
from sklearn.tree import DecisionTreeRegressor

models = {'Linear Regression': LinearRegression(),
          'Ridge Regression': Ridge(),
          'Lasso Regression': Lasso(),
         'Decision Tree Regressor':DecisionTreeRegressor()}


# Train models
for name, model in models.items():
    model_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model)
    ])
    model_pipeline.fit(X_train, y_train)
    trained_models[name] = model_pipeline



# Evaluate models 
for name, model_pipeline in trained_models.items():
    y_pred = model_pipeline.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    evaluations[name] = {'Mean Squared Error': mse,
                          'Mean Absolute Error': mae,
                          'R^2 Score': r2}

# Display evaluation metrics 
for name, metrics in evaluations.items():
    print(f"{name}:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    print()


Linear Regression:
Mean Squared Error: 0.7033566017436106
Mean Absolute Error: 0.6671331480264896
R^2 Score: 0.43730181943482493

Ridge Regression:
Mean Squared Error: 0.7017429745852671
Mean Absolute Error: 0.6666715818996598
R^2 Score: 0.4385927507545275

Lasso Regression:
Mean Squared Error: 0.5665976098709371
Mean Absolute Error: 0.6256100264339348
R^2 Score: 0.5467115210170849

Decision Tree Regressor:
Mean Squared Error: 1.2309306122448982
Mean Absolute Error: 0.8424489795918367
R^2 Score: 0.015232935618815557

