# How To Select Best Model Using Different Models

In [11]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBRFClassifier
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# load the data
df = sns.load_dataset('titanic')

# splitting the data into x and y
X = df[['sex','pclass','age','fare','embarked']]
y = df['survived']

# use train test spilt the data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# creating list of model
models = [
    ('RandomForestclassifer',RandomForestClassifier(random_state=42)),
    ('GradientBoostingClassifier',GradientBoostingClassifier(random_state=42)),
    ('XGBRFClassifier',XGBRFClassifier(42))
]

# declaring model and thier 
# accuracy
best_model = None
best_accuracy = 0.0
# itrate the model over thier performence
for name, model in models :
    # crete a pipline
    pipeline = Pipeline([
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('encoder',OneHotEncoder(handle_unknown='ignore')),
        ('model',model)
    ])
    # perform corss validation
    score = cross_val_score(pipeline,X_train,y_train,cv=5)

    # taking the mean of the score
    mean_score = score.mean()

    # fitting the pipline on the traing data
    pipeline.fit(X_train,y_train)
    # prediction
    y_pred = pipeline.predict(X_test)

    # calculating accuracy
    accuracy = accuracy_score(y_test,y_pred)
    # print the models performing
    print('model',name)
    print('cross_val_accuracy',mean_score)
    print('Test accuracy',accuracy)
    print()
    # check if the current model has best accuracy
    if accuracy > best_accuracy :
        best_accuracy = accuracy
        best_model = pipeline
        
        
# retrive the best model
print('Best model',best_model)





model RandomForestclassifer
cross_val_accuracy 0.8033586132177681
Test accuracy 0.8100558659217877

model GradientBoostingClassifier
cross_val_accuracy 0.814636068157195
Test accuracy 0.8044692737430168

model XGBRFClassifier
cross_val_accuracy 0.7738796414852752
Test accuracy 0.7653631284916201

Best model Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),
                ('encoder', OneHotEncoder(handle_unknown='ignore')),
                ('model', RandomForestClassifier(random_state=42))])


# Her you can select the Best model Easily