In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler

tips = pd.read_csv("/content/tips.csv")
# Feature Engineering: One-Hot Encoding
categorical_cols = ['sex', 'smoker', 'day', 'time']
encoder = OneHotEncoder(drop='first', sparse_output=False)  # Drop first to avoid multicollinearity
encoded_cols = encoder.fit_transform(tips[categorical_cols])
encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out(categorical_cols))
tips = pd.concat([tips.drop(categorical_cols, axis=1), encoded_df], axis=1)

# Scaling Numerical Features
numerical_cols = ['total_bill', 'size']
scaler = StandardScaler()
tips[numerical_cols] = scaler.fit_transform(tips[numerical_cols])

# Splitting Data
X = tips.drop('tip', axis=1)
y = tips['tip']
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
lr_pred = linear_model.predict(X_test)  # Make predictions
# Ridge Regression
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)
ridge_pred = ridge_model.predict(X_test)

# Lasso Regression
lasso_model = Lasso()
lasso_model.fit(X_train, y_train)
lasso_pred = lasso_model.predict(X_test)

# Random Forest Regressor
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)

# Function to evaluate models
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    print(f"{model_name} - MAE: {mae:.2f}, MSE: {mse:.2f}")



# Evaluate all models
evaluate_model(y_test, lr_pred, "Linear Regression")
evaluate_model(y_test, ridge_pred, "Ridge Regression")
evaluate_model(y_test, lasso_pred, "Lasso Regression")
evaluate_model(y_test, tree_pred, "Decision Tree")



Linear Regression - MAE: 0.71, MSE: 0.95
Ridge Regression - MAE: 0.70, MSE: 0.94
Lasso Regression - MAE: 0.89, MSE: 1.10
Decision Tree - MAE: 0.85, MSE: 1.29
