In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score


In [2]:
df = pd.read_csv("./abalone.csv")


In [3]:
df.head()


Unnamed: 0,Sex,Length,Diameter,Height,WholeWeight,ShuckedWeight,VisceraWeight,ShellWeight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [4]:
df["Sex"] = df["Sex"].map(lambda x: 1 if x == "M" else (-1 if x == "F" else 0))


In [5]:
feature_names, target = df.columns[:-1], df.columns[-1]


In [6]:
cv = KFold(shuffle=True, random_state=1)


In [7]:
n_trees = range(1, 51)


In [8]:
random_forest = RandomForestRegressor(random_state=1)


In [27]:
grid_search = GridSearchCV(
    random_forest, {"n_estimators": n_trees}, cv=cv, n_jobs=-1, scoring="r2"
)


In [28]:
grid_search.fit(df[feature_names], df[target])


GridSearchCV(cv=KFold(n_splits=5, random_state=1, shuffle=True),
             estimator=RandomForestRegressor(random_state=1),
             param_grid={'n_estimators': range(1, 51)}, scoring='r2')

In [29]:
grid_search.best_estimator_


RandomForestRegressor(n_estimators=50, random_state=1)

In [30]:
cv_results = pd.DataFrame(grid_search.cv_results_)[["mean_test_score", "params"]]


In [31]:
cv_results[cv_results["mean_test_score"] > 0.52]


Unnamed: 0,mean_test_score,params
20,0.520529,{'n_estimators': 21}
21,0.520804,{'n_estimators': 22}
22,0.521743,{'n_estimators': 23}
23,0.523106,{'n_estimators': 24}
24,0.523249,{'n_estimators': 25}
25,0.524308,{'n_estimators': 26}
26,0.524639,{'n_estimators': 27}
27,0.525656,{'n_estimators': 28}
28,0.526556,{'n_estimators': 29}
29,0.527086,{'n_estimators': 30}
