In [1]:
import pandas as pd

In [2]:
import seaborn as sns

In [3]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [5]:
healthexp = sns.load_dataset('healthexp')

In [7]:
healthexp.tail(10)

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
264,2019,France,5167.839,82.9
265,2019,Great Britain,4385.463,81.4
266,2019,Japan,4610.794,84.4
267,2019,USA,10855.517,78.8
268,2020,Canada,5828.324,81.7
269,2020,Germany,6938.983,81.1
270,2020,France,5468.418,82.3
271,2020,Great Britain,5018.7,80.4
272,2020,Japan,4665.641,84.7
273,2020,USA,11859.179,77.0


In [8]:
healthexp = pd.get_dummies(healthexp)

In [9]:
healthexp.head()

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,False,False,True,False,False,False
1,1970,192.143,72.2,False,True,False,False,False,False
2,1970,123.993,71.9,False,False,False,True,False,False
3,1970,150.437,72.0,False,False,False,False,True,False
4,1970,326.961,70.9,False,False,False,False,False,True


In [11]:
X = healthexp.drop(['Life_Expectancy'], axis=1)

In [12]:
y = healthexp['Life_Expectancy']

In [13]:
y

0      70.6
1      72.2
2      71.9
3      72.0
4      70.9
       ... 
269    81.1
270    82.3
271    80.4
272    84.7
273    77.0
Name: Life_Expectancy, Length: 274, dtype: float64

In [14]:
X

Unnamed: 0,Year,Spending_USD,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,False,False,True,False,False,False
1,1970,192.143,False,True,False,False,False,False
2,1970,123.993,False,False,False,True,False,False
3,1970,150.437,False,False,False,False,True,False
4,1970,326.961,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...
269,2020,6938.983,False,False,True,False,False,False
270,2020,5468.418,False,True,False,False,False,False
271,2020,5018.700,False,False,False,True,False,False
272,2020,4665.641,False,False,False,False,True,False


In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=19)

In [17]:
from sklearn.ensemble import RandomForestRegressor

In [18]:
rfr = RandomForestRegressor(random_state=13)

In [19]:
rfr.fit(X_train,y_train)

In [20]:
y_pred = rfr.predict(X_test)

In [21]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [22]:
mean_absolute_error(y_test,y_pred)

0.25916363636361917

In [23]:
mean_squared_error(y_test,y_pred)

0.10221141818181628

In [24]:
r2_score(y_test,y_pred)

0.9910457602615238

In [26]:
param_grid = {
    'n_estimators':[100,200,300],
    'max_depth':[10,20,30],
    'min_samples_split': [2,5,10],
    'min_samples_leaf': [1,2,4]
}

In [29]:
from sklearn.model_selection import GridSearchCV

In [32]:
rfr_cv = GridSearchCV(rfr,param_grid=param_grid,cv=3,n_jobs=-1)

In [34]:
rfr_cv.fit(X_train,y_train)

In [35]:
y_pred2= rfr_cv.predict(X_test)

In [36]:
mean_absolute_error(y_test,y_pred2)

0.25089696969702713

In [37]:
mean_squared_error(y_test,y_pred2)

0.09636595151516295

In [38]:
r2_score(y_test,y_pred2)

0.9915578528520343