In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score

from sklearn.tree import DecisionTreeRegressor

pd.set_option("display.max_columns",None)

# dataset

In [33]:
data = pd.read_csv("/content/dataset.csv")

In [34]:
df = data.copy()

In [35]:
df.head()

Unnamed: 0,age,experience,income
0,25,1,30450
1,30,3,35670
2,47,2,31580
3,32,5,40130
4,43,10,47830


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   age         20 non-null     int64
 1   experience  20 non-null     int64
 2   income      20 non-null     int64
dtypes: int64(3)
memory usage: 608.0 bytes


In [37]:
df.isnull().sum()

Unnamed: 0,0
age,0
experience,0
income,0


# dataset split

In [38]:
x = df.drop("income",axis=1)
y = df["income"]

In [39]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

# model

In [60]:
from sklearn.tree import DecisionTreeRegressor
model = AdaBoostRegressor(estimator=DecisionTreeRegressor())

In [41]:
model.fit(x_train,y_train)

In [42]:
pred = model.predict(x_test)

In [43]:
print("mse:- ",mean_squared_error(y_test,pred))
print("r2_score:- ",r2_score(y_test,pred))

mse:-  8635900.0
r2_score:-  0.29782687463379354


In [44]:
param_grid = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.1, 0.5, 1.0],
    'estimator__max_depth': [1, 2, 3, 4, 5, 6, 7],
    'loss': ['linear', 'square', 'exponential']
}


In [45]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

In [46]:
searchgrid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3,scoring="r2")

In [47]:
searchgrid.fit(x_train,y_train)

In [48]:
searchgrid.best_params_

{'estimator__max_depth': 4,
 'learning_rate': 0.5,
 'loss': 'square',
 'n_estimators': 500}

In [49]:
pred = model.predict(x_test)

In [50]:
print("mse:- ",mean_squared_error(y_test,pred))
print("r2_score:- ",r2_score(y_test,pred))

mse:-  8635900.0
r2_score:-  0.29782687463379354


In [59]:
from sklearn.ensemble import RandomForestRegressor

rf_regressor = RandomForestRegressor()
rf_regressor.fit(x_train, y_train)

y_pred = rf_regressor.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")


Mean Squared Error: 2096909.4175
R-squared Score: 0.8295
