In [1]:
import pandas as pd
import numpy as np

In [2]:
file_path = '/content/drive/MyDrive/ajay_project/50_Startups.csv'
data = pd.read_csv(file_path)

In [3]:
data.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit
0,165349.2,136897.8,471784.1,192261.83
1,162597.7,151377.59,443898.53,191792.06
2,153441.51,101145.55,407934.54,191050.39
3,144372.41,118671.85,383199.62,182901.99
4,142107.34,91391.77,366168.42,166187.94


In [4]:
data.tail()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit
45,1000.23,124153.04,1903.93,64926.08
46,1315.46,115816.21,297114.46,49490.75
47,0.0,135426.92,0.0,42559.73
48,542.05,51743.15,0.0,35673.41
49,0.0,116983.8,45173.06,14681.4


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

X = data[['R&D Spend', 'Administration', 'Marketing Spend']]
y = data['Profit']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
    "Random Forest Regressor": RandomForestRegressor(random_state=42, n_estimators=100)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results[name] = {"MSE": mse, "MAE": mae, "R2": r2}

results


{'Linear Regression': {'MSE': 80926321.22295158,
  'MAE': 6979.152252370402,
  'R2': 0.9000653083037321},
 'Decision Tree Regressor': {'MSE': 400026479.25494,
  'MAE': 13755.663999999995,
  'R2': 0.5060133431179846},
 'Random Forest Regressor': {'MSE': 72625008.62306513,
  'MAE': 6437.497739999977,
  'R2': 0.9103164738430438}}

In [14]:
#CONCLUSION:
# The Random Forest Regressor performs the best, with the lowest MSE and MAE and the highest R² score.
#  It is the most suitable model for predicting profits in this dataset.