In [2]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder


In [3]:

data = pd.read_csv('/content/tips.csv')

print(data.head())

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4


In [5]:
numerical_features = ["total_bill", "size"] # Numerical features

categorical_features = [col for col in ["sex", "smoker", "day", "time"] if col in data.columns] # Categorical features

# Define preprocessing steps
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical_features),  # Standardize numerical features
    ("cat", OneHotEncoder(handle_unknown="ignore", drop="first"), categorical_features)  # One-hot encode categorical features
])


In [6]:

X = data.drop(columns=["tip"])  #features (X)
y = data["tip"]  #variable (y)


In [10]:

# Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Preprocess the data
X_train = preprocessor.fit_transform(X_train) # Fit and transform on training data
X_test = preprocessor.transform(X_test) # Transform test data using the fitted preprocessor

In [13]:

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)  # Train the model
lr_pred = lr_model.predict(X_test)  # Make predictions


ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
ridge_pred = ridge_model.predict(X_test)


lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
lasso_pred = lasso_model.predict(X_test)


tree_model = DecisionTreeRegressor(max_depth=5)
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)


In [15]:
# Function to evaluate models
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    print(f"{model_name} - MAE: {mae:.2f}, MSE: {mse:.2f}")



# Evaluate all models
evaluate_model(y_test, lr_pred, "Linear Regression")
evaluate_model(y_test, ridge_pred, "Ridge Regression")
evaluate_model(y_test, lasso_pred, "Lasso Regression")
evaluate_model(y_test, tree_pred, "Decision Tree")

Linear Regression - MAE: 0.67, MSE: 0.70
Ridge Regression - MAE: 0.67, MSE: 0.70
Lasso Regression - MAE: 0.65, MSE: 0.61
Decision Tree - MAE: 0.74, MSE: 0.98
