In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv("50_Startups.csv")

# Split the data into features (X) and target variable (Y)
X = data.drop("Profit", axis=1)
Y = data["Profit"]

# Divide the data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Construct different regression algorithms
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Decision Tree Regression": DecisionTreeRegressor(),
    "Random Forest Regression": RandomForestRegressor(),
    "Gradient Boosting Regression": GradientBoostingRegressor()
}

# Train and evaluate different regression models
results = {}
for model_name, model in models.items():
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)
    mae = mean_absolute_error(Y_test, Y_pred)
    mse = mean_squared_error(Y_test, Y_pred)
    rmse = mean_squared_error(Y_test, Y_pred, squared=False)
    r2 = r2_score(Y_test, Y_pred)
    results[model_name] = {"MAE": mae, "MSE": mse, "RMSE": rmse, "R-squared": r2}

# Print the regression metrics for each model
for model_name, metrics in results.items():
    print(f"{model_name}:\n")
    print("MAE:", metrics["MAE"])
    print("MSE:", metrics["MSE"])
    print("RMSE:", metrics["RMSE"])
    print("R-squared:", metrics["R-squared"])
    print()

# Choose the best model based on a specific metric
best_model = min(results, key=lambda x: results[x]["RMSE"])
print("Best model:", best_model)


Linear Regression:

MAE: 6979.152252370404
MSE: 80926321.22295162
RMSE: 8995.905803361416
R-squared: 0.900065308303732

Ridge Regression:

MAE: 6979.152252428603
MSE: 80926321.22368819
RMSE: 8995.905803402356
R-squared: 0.9000653083028225

Lasso Regression:

MAE: 6979.152235475121
MSE: 80926320.76116833
RMSE: 8995.905777695114
R-squared: 0.9000653088739812

Decision Tree Regression:

MAE: 9982.975999999995
MSE: 161851110.35489988
RMSE: 12722.071779191465
R-squared: 0.800132508563502

Random Forest Regression:

MAE: 6614.176119999947
MSE: 84918899.85618095
RMSE: 9215.145134840848
R-squared: 0.8951349332569578

Gradient Boosting Regression:

MAE: 8585.515904177995
MSE: 87825328.42481425
RMSE: 9371.516868939321
R-squared: 0.8915458285187923

Best model: Lasso Regression
