# Random Forest Regressor with Pipelining and Hyperparameter Tunning

## Assignment--

In [2]:
import seaborn as sns

In [4]:
df = sns.load_dataset('tips')

In [5]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [6]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [10]:
# Independent and dependent feature. 
X = df.drop(labels=['total_bill'],axis=1)
y = df.total_bill

In [11]:
X.head()

Unnamed: 0,tip,sex,smoker,day,time,size
0,1.01,Female,No,Sun,Dinner,2
1,1.66,Male,No,Sun,Dinner,3
2,3.5,Male,No,Sun,Dinner,3
3,3.31,Male,No,Sun,Dinner,2
4,3.61,Female,No,Sun,Dinner,4


In [12]:
y

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [15]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10)

In [16]:
numerical_cols = ['tip','size']
categorical_cols = ['sex','smoker','day','time']

In [17]:
## Feature Engineering Automation 
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer ## Handle Missing Value
from sklearn.preprocessing import StandardScaler ## Feature Scaling
from sklearn.preprocessing import OneHotEncoder ## Categorical to numerical
from sklearn.compose import ColumnTransformer

In [20]:
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())
    ]
)
cat_pipeline = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('encoder',OneHotEncoder())
    ]
)

In [21]:
processor = ColumnTransformer([('num_pipeline',num_pipeline,numerical_cols),
                               ('cat_pipeline',cat_pipeline,categorical_cols)])

In [22]:
X_train = processor.fit_transform(X_train)
X_test = processor.transform(X_test)

In [50]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

In [38]:
from sklearn.metrics import r2_score

In [53]:
## Model Traning Automation 
models = {
    'Random Regressor':RandomForestRegressor(), 
    'ridge':Ridge(), 
    'lasso':Lasso(), 
    'linear':LinearRegression(),
    'SVR':SVR()
}

In [40]:
def evaluate_model(X_train,y_train,X_test,y_test,models): 
    report = {}
    for i in range(len(models)): 
        model = list(models.values())[i]
        model.fit(X_train,y_train)

        y_test_pred = model.predict(X_test)

        # r2 score of the test model. 
        score = r2_score(y_test,y_test_pred)

        report[list(models.keys())[i]] = score
    return report

In [54]:
evaluate_model(X_train,y_train,X_test,y_test,models)

{'Random Regressor': 0.4927750635676421,
 'ridge': 0.5253933692551901,
 'lasso': 0.4687787326032977,
 'linear': 0.526481379031322,
 'SVR': 0.405632199234631}

In [55]:
classifier = Ridge()

In [58]:
# Hyperparameter Tunning 
params = {
    'solver':['auto', 'svd', 'cholesky', 'lsqr'],
    'alpha':[1,0.1,2,0.01]
}

In [59]:
from sklearn.model_selection import RandomizedSearchCV

In [62]:
cv = RandomizedSearchCV(classifier,param_distributions=params,cv=5,verbose=3)
cv.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END ........alpha=0.1, solver=cholesky;, score=0.084 total time=   0.0s
[CV 2/5] END ........alpha=0.1, solver=cholesky;, score=0.585 total time=   0.0s
[CV 3/5] END ........alpha=0.1, solver=cholesky;, score=0.704 total time=   0.0s
[CV 4/5] END ........alpha=0.1, solver=cholesky;, score=0.468 total time=   0.0s
[CV 5/5] END ........alpha=0.1, solver=cholesky;, score=0.644 total time=   0.0s
[CV 1/5] END ...............alpha=2, solver=svd;, score=0.085 total time=   0.0s
[CV 2/5] END ...............alpha=2, solver=svd;, score=0.591 total time=   0.0s
[CV 3/5] END ...............alpha=2, solver=svd;, score=0.706 total time=   0.0s
[CV 4/5] END ...............alpha=2, solver=svd;, score=0.472 total time=   0.0s
[CV 5/5] END ...............alpha=2, solver=svd;, score=0.638 total time=   0.0s
[CV 1/5] END ...........alpha=0.01, solver=lsqr;, score=0.084 total time=   0.0s
[CV 2/5] END ...........alpha=0.01, solver=lsqr;

In [63]:
cv.best_params_

{'solver': 'svd', 'alpha': 2}

In [64]:
classifier1 = Ridge(solver='svd',alpha=2)

In [65]:
classifier.fit(X_train,y)
y_pred = classifier1.predict(X_test)

NotFittedError: This Ridge instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.