In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('C:/Users/abman/OneDrive/Desktop/Complete ML Bootcamp Udemy/Data/Travel.csv')

In [3]:
df.head()

Unnamed: 0,CustomerID,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,200000,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Single,1.0,1,2,1,0.0,Manager,20993.0
1,200001,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,200002,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Single,7.0,1,3,0,0.0,Executive,17090.0
3,200003,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,200004,0,,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4888 entries, 0 to 4887
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   CustomerID                4888 non-null   int64  
 1   ProdTaken                 4888 non-null   int64  
 2   Age                       4662 non-null   float64
 3   TypeofContact             4863 non-null   object 
 4   CityTier                  4888 non-null   int64  
 5   DurationOfPitch           4637 non-null   float64
 6   Occupation                4888 non-null   object 
 7   Gender                    4888 non-null   object 
 8   NumberOfPersonVisiting    4888 non-null   int64  
 9   NumberOfFollowups         4843 non-null   float64
 10  ProductPitched            4888 non-null   object 
 11  PreferredPropertyStar     4862 non-null   float64
 12  MaritalStatus             4888 non-null   object 
 13  NumberOfTrips             4748 non-null   float64
 14  Passport

## Data Cleaning

#### Handling missing values

In [5]:
df.isnull().sum()

CustomerID                    0
ProdTaken                     0
Age                         226
TypeofContact                25
CityTier                      0
DurationOfPitch             251
Occupation                    0
Gender                        0
NumberOfPersonVisiting        0
NumberOfFollowups            45
ProductPitched                0
PreferredPropertyStar        26
MaritalStatus                 0
NumberOfTrips               140
Passport                      0
PitchSatisfactionScore        0
OwnCar                        0
NumberOfChildrenVisiting     66
Designation                   0
MonthlyIncome               233
dtype: int64

In [6]:
# checking values in all categorical columns

df['Gender'].value_counts()

Male       2916
Female     1817
Fe Male     155
Name: Gender, dtype: int64

In [7]:
df['MaritalStatus'].value_counts()

Married      2340
Divorced      950
Single        916
Unmarried     682
Name: MaritalStatus, dtype: int64

In [8]:
df['TypeofContact'].value_counts()

Self Enquiry       3444
Company Invited    1419
Name: TypeofContact, dtype: int64

In [9]:
df['Gender'] = df['Gender'].replace('Fe Male', 'Female')
df['MaritalStatus'] = df['MaritalStatus'].replace('Single', 'Unmarried')

In [10]:
df.head()

Unnamed: 0,CustomerID,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,200000,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,0.0,Manager,20993.0
1,200001,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,200002,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Unmarried,7.0,1,3,0,0.0,Executive,17090.0
3,200003,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,200004,0,,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


In [11]:
df['Gender'].value_counts()

Male      2916
Female    1972
Name: Gender, dtype: int64

In [12]:
df['MaritalStatus'].value_counts()

Married      2340
Unmarried    1598
Divorced      950
Name: MaritalStatus, dtype: int64

In [13]:
features_with_nan = [features for features in df.columns if df[features].isnull().sum()>=1] # segregating features having null values
for feature in features_with_nan:
    print(feature, np.round(df[feature].isnull().mean()*100,5), '% missing values')

# np.round(df[feature].isnull().mean()*100,5) is multiplying mean of round off no of null values with 100 to get percentage followed by 5 decimal 

Age 4.62357 % missing values
TypeofContact 0.51146 % missing values
DurationOfPitch 5.13502 % missing values
NumberOfFollowups 0.92062 % missing values
PreferredPropertyStar 0.53191 % missing values
NumberOfTrips 2.86416 % missing values
NumberOfChildrenVisiting 1.35025 % missing values
MonthlyIncome 4.76678 % missing values


In [14]:
np.round(df['Age'].isnull().mean()*100,5)

4.62357

In [15]:
# statistics of numerical columns that have null
df[features_with_nan].select_dtypes(exclude='object').describe()

Unnamed: 0,Age,DurationOfPitch,NumberOfFollowups,PreferredPropertyStar,NumberOfTrips,NumberOfChildrenVisiting,MonthlyIncome
count,4662.0,4637.0,4843.0,4862.0,4748.0,4822.0,4655.0
mean,37.622265,15.490835,3.708445,3.581037,3.236521,1.187267,23619.853491
std,9.316387,8.519643,1.002509,0.798009,1.849019,0.857861,5380.698361
min,18.0,5.0,1.0,3.0,1.0,0.0,1000.0
25%,31.0,9.0,3.0,3.0,2.0,1.0,20346.0
50%,36.0,13.0,4.0,3.0,3.0,1.0,22347.0
75%,44.0,20.0,4.0,4.0,4.0,2.0,25571.0
max,61.0,127.0,6.0,5.0,22.0,3.0,98678.0


In [16]:
# filling the null values

df.Age.fillna(df['Age'].median(),inplace=True)

In [17]:
df['Age'].isna().sum()

0

In [23]:
df.TypeofContact.fillna(df['TypeofContact'].mode()[0],inplace=True)

In [21]:
df['TypeofContact'].mode()[0]

'Self Enquiry'

In [24]:
df.DurationOfPitch.fillna(df['DurationOfPitch'].median(),inplace=True)

0        6.0
1       14.0
2        8.0
3        9.0
4        8.0
        ... 
4883     9.0
4884    31.0
4885    17.0
4886    16.0
4887    14.0
Name: DurationOfPitch, Length: 4888, dtype: float64

In [25]:
df.NumberOfFollowups.fillna(df['NumberOfFollowups'].mode()[0],inplace=True)

In [26]:
df.PreferredPropertyStar.fillna(df['PreferredPropertyStar'].mode()[0],inplace=True)


In [27]:
df.NumberOfTrips.fillna(0, inplace=True)

In [32]:
df.DurationOfPitch.fillna(df['DurationOfPitch'].median(), inplace=True)

In [28]:
df.NumberOfChildrenVisiting.fillna(df['NumberOfChildrenVisiting'].mode()[0], inplace=True)

In [30]:
df.MonthlyIncome.fillna(df['MonthlyIncome'].median(), inplace=True)

In [33]:
df.isnull().sum()

CustomerID                  0
ProdTaken                   0
Age                         0
TypeofContact               0
CityTier                    0
DurationOfPitch             0
Occupation                  0
Gender                      0
NumberOfPersonVisiting      0
NumberOfFollowups           0
ProductPitched              0
PreferredPropertyStar       0
MaritalStatus               0
NumberOfTrips               0
Passport                    0
PitchSatisfactionScore      0
OwnCar                      0
NumberOfChildrenVisiting    0
Designation                 0
MonthlyIncome               0
dtype: int64

In [34]:
# Customer id is not required so we are dropping

df.drop('CustomerID', inplace=True, axis=1)

In [35]:
df.head()

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,0.0,Manager,20993.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Unmarried,7.0,1,3,0,0.0,Executive,17090.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,0,36.0,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


## Feature Engineering

In [36]:
# We have Noof children visiting and no of person visting we will create new column adding both drop the existing columns

df['TotalVisiting'] = df['NumberOfChildrenVisiting'] + df['NumberOfPersonVisiting']

df.drop(columns=['NumberOfChildrenVisiting', 'NumberOfPersonVisiting'], inplace=True, axis=1)

In [37]:
df.head()

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,Designation,MonthlyIncome,TotalVisiting
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,Manager,20993.0,3.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,Manager,20130.0,5.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,4.0,Basic,3.0,Unmarried,7.0,1,3,0,Executive,17090.0,3.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,3.0,Basic,3.0,Divorced,2.0,1,5,1,Executive,17909.0,3.0
4,0,36.0,Self Enquiry,1,8.0,Small Business,Male,3.0,Basic,4.0,Divorced,1.0,0,5,1,Executive,18468.0,2.0


In [38]:
Num_feature = [feature for feature in df.columns if df[feature].dtype != 'O']
print('No of Numerical feature :', len(Num_feature))

No of Numerical feature : 12


In [39]:
cat_feature = [feature for feature in df.columns if df[feature].dtype == 'O']
print('No of Categorical feature :', len(cat_feature))

No of Categorical feature : 6


In [42]:
# discreate feature

discreate_feature = [feature for feature in Num_feature if len(df[feature].unique()) <=25]
print('Number of discreate feature :', len(discreate_feature))

Number of discreate feature : 9


In [43]:
# Continuous feature

Continuous_feature = [feature for feature in Num_feature if feature not in discreate_feature]
print('Number of continuous feature :', len(Continuous_feature))

Number of continuous feature : 3


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4888 entries, 0 to 4887
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ProdTaken               4888 non-null   int64  
 1   Age                     4888 non-null   float64
 2   TypeofContact           4888 non-null   object 
 3   CityTier                4888 non-null   int64  
 4   DurationOfPitch         4888 non-null   float64
 5   Occupation              4888 non-null   object 
 6   Gender                  4888 non-null   object 
 7   NumberOfFollowups       4888 non-null   float64
 8   ProductPitched          4888 non-null   object 
 9   PreferredPropertyStar   4888 non-null   float64
 10  MaritalStatus           4888 non-null   object 
 11  NumberOfTrips           4888 non-null   float64
 12  Passport                4888 non-null   int64  
 13  PitchSatisfactionScore  4888 non-null   int64  
 14  OwnCar                  4888 non-null   

## train test split

In [46]:
from sklearn.model_selection import train_test_split

X = df.drop(['ProdTaken'], axis=1)
y = df['ProdTaken']

In [47]:
y.value_counts()

0    3968
1     920
Name: ProdTaken, dtype: int64

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=43)
X_train.shape, X_test.shape

((3910, 17), (978, 17))

# Encoding and Scaling for Train and Test Data

In [51]:
# segregating Numerical and categorical featured from X

cat_feature = X.select_dtypes(include='object').columns
Num_feature = X.select_dtypes(exclude='object').columns

In [52]:
cat_feature


Index(['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
       'MaritalStatus', 'Designation'],
      dtype='object')

In [53]:
Num_feature

Index(['Age', 'CityTier', 'DurationOfPitch', 'NumberOfFollowups',
       'PreferredPropertyStar', 'NumberOfTrips', 'Passport',
       'PitchSatisfactionScore', 'OwnCar', 'MonthlyIncome', 'TotalVisiting'],
      dtype='object')

In [55]:
# Performing one hot encoding for categorical columns and Scaling for Numerical columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
onehot_transformer = OneHotEncoder(drop='first')

In [57]:
preprocessor = ColumnTransformer(
    [('OneHotEncoder', onehot_transformer, cat_feature),
     ("StandardScaler", numeric_transformer, Num_feature)])

In [58]:
preprocessor

In [59]:
X_train = preprocessor.fit_transform(X_train)

In [60]:
X_test = preprocessor.transform(X_test)

In [61]:
pd.DataFrame(X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,1.485145,-0.532978,0.290156,-0.714165,1.462112,-0.652661,0.679646,0.787648,-0.396451,-0.069993
1,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.485145,-1.022080,0.290156,-0.714165,-0.089555,-0.652661,0.679646,-1.269602,-0.008160,2.051504
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,-0.704079,-0.166151,0.290156,0.540275,1.462112,-0.652661,-0.054244,0.787648,0.734781,-0.777159
3,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,1.485145,-0.288426,-2.719152,-0.714165,-1.124000,-0.652661,-0.054244,0.787648,-0.727983,-1.484325
4,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,-0.704079,-0.410702,0.290156,-0.714165,-0.089555,-0.652661,0.679646,0.787648,-0.612586,-0.069993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3905,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,-0.704079,1.912533,0.290156,0.540275,0.427668,-0.652661,-0.054244,0.787648,0.654153,-1.484325
3906,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,-0.704079,1.667982,-2.719152,-0.714165,-0.606777,-0.652661,-1.522024,0.787648,-1.105186,-0.777159
3907,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,-0.704079,-1.022080,0.290156,-0.714165,-0.606777,-0.652661,-1.522024,-1.269602,-0.395887,-1.484325
3908,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,-0.704079,-0.410702,0.290156,-0.714165,-0.606777,-0.652661,-1.522024,0.787648,-1.135445,-0.777159


# Model Building

In [70]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

In [71]:
models = {
    "RandomForestClassifier" : RandomForestClassifier(),
    "DecisionTreeClassifier" : DecisionTreeClassifier(),
    'LogisticRegression' : LogisticRegression()
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train) # training model


    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)


    #Training data performance
    model_train_accuracy = accuracy_score(y_train,y_train_pred)
    model_train_f1 = f1_score(y_train,y_train_pred, average="weighted")
    model_train_precision = precision_score(y_train,y_train_pred)
    model_train_recall = recall_score(y_train, y_train_pred)
    model_train_rocauc_score = roc_auc_score(y_train,y_train_pred)

    #Test Data performance
    model_test_accuracy = accuracy_score(y_test,y_test_pred)
    model_test_f1 = f1_score(y_test,y_test_pred, average="weighted")
    model_test_precision = precision_score(y_test,y_test_pred)
    model_test_recall = recall_score(y_test,y_test_pred)
    model_test_rocauc_score = roc_auc_score(y_test,y_test_pred)

    # Model Performance
    print(list(models.keys())[i])


    print('Model performance for Training Data')
    print('Accuracy : {:.4f}'.format(model_train_accuracy))
    print('F1 score : {:.4f}'.format(model_train_f1))
    print('Precision : {:.4f}'.format(model_train_precision))
    print('Recall : {:.4f}'.format(model_train_recall))
    print('Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))
    
    print('_'*40)

    print('Model performance for Test Data')
    print('Accuracy : {:.4f}'.format(model_test_accuracy))
    print('F1 score : {:.4f}'.format(model_test_f1))
    print('Precision : {:.4f}'.format(model_test_precision))
    print('Recall : {:.4f}'.format(model_test_recall))
    print('Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))
   
   
    print('='* 40)
    print('\n')

RandomForestClassifier
Model performance for Training Data
Accuracy : 1.0000
F1 score : 1.0000
Precision : 1.0000
Recall : 1.0000
Roc Auc Score: 1.0000
________________________________________
Model performance for Test Data
Accuracy : 0.9233
F1 score : 0.9167
Precision : 0.9496
Recall : 0.6209
Roc Auc Score: 0.8067


DecisionTreeClassifier
Model performance for Training Data
Accuracy : 1.0000
F1 score : 1.0000
Precision : 1.0000
Recall : 1.0000
Roc Auc Score: 1.0000
________________________________________
Model performance for Test Data
Accuracy : 0.9029
F1 score : 0.9023
Precision : 0.7458
Recall : 0.7253
Roc Auc Score: 0.8344


LogisticRegression
Model performance for Training Data
Accuracy : 0.8514
F1 score : 0.8281
Precision : 0.7329
Recall : 0.3347
Roc Auc Score: 0.6532
________________________________________
Model performance for Test Data
Accuracy : 0.8323
F1 score : 0.8031
Precision : 0.6184
Recall : 0.2582
Roc Auc Score: 0.6109




# Hyper Parametric Tuning

In [77]:
# hyper parametric tuning for Random Forst 

rf_params = {
    'max_depth' : [5,8,15,None,10],
    'max_features' : [5,7,'auto', 8],
    'min_samples_split' : [2,8,15,20],
    'n_estimators' : [100,200,50,1000]
}

In [78]:
rf_params

{'max_depth': [5, 8, 15, None, 10],
 'max_features': [5, 7, 'auto', 8],
 'min_samples_split': [2, 8, 15, 20],
 'n_estimators': [100, 200, 50, 1000]}

In [79]:
# Model list for Random search CV
randomcv_models = [
    ('RF', RandomForestClassifier(), rf_params)
    
]

In [80]:
from sklearn.model_selection import RandomizedSearchCV

model_param = {}
for name, model, params in randomcv_models:
    random = RandomizedSearchCV(estimator=model, 
                                param_distributions=params,
                                n_iter=100,
                                cv=3,
                                verbose=2,
                                n_jobs=-1)
    random.fit(X_train,y_train)
    model_param[name] = random.best_params_


for model_name in model_param:
    print(f"______________________ Best Params for {model_name} ______________________v ")
    print(model_param[model_name])


Fitting 3 folds for each of 100 candidates, totalling 300 fits
______________________ Best Params for RF ______________________v 
{'n_estimators': 1000, 'min_samples_split': 2, 'max_features': 8, 'max_depth': None}


In [81]:
models = {
    'Random Forest' : RandomForestClassifier(n_estimators=1000, min_samples_split=2,max_features=8, max_depth=None)

}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train) # training model


    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)


    #Training data performance
    model_train_accuracy = accuracy_score(y_train,y_train_pred)
    model_train_f1 = f1_score(y_train,y_train_pred, average="weighted")
    model_train_precision = precision_score(y_train,y_train_pred)
    model_train_recall = recall_score(y_train, y_train_pred)
    model_train_rocauc_score = roc_auc_score(y_train,y_train_pred)

    #Test Data performance
    model_test_accuracy = accuracy_score(y_test,y_test_pred)
    model_test_f1 = f1_score(y_test,y_test_pred, average="weighted")
    model_test_precision = precision_score(y_test,y_test_pred)
    model_test_recall = recall_score(y_test,y_test_pred)
    model_test_rocauc_score = roc_auc_score(y_test,y_test_pred)

    # Model Performance
    print(list(models.keys())[i])


    print('Model performance for Training Data')
    print('Accuracy : {:.4f}'.format(model_train_accuracy))
    print('F1 score : {:.4f}'.format(model_train_f1))
    print('Precision : {:.4f}'.format(model_train_precision))
    print('Recall : {:.4f}'.format(model_train_recall))
    print('Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))
    
    print('_'*40)

    print('Model performance for Test Data')
    print('Accuracy : {:.4f}'.format(model_test_accuracy))
    print('F1 score : {:.4f}'.format(model_test_f1))
    print('Precision : {:.4f}'.format(model_test_precision))
    print('Recall : {:.4f}'.format(model_test_recall))
    print('Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))
   
   
    print('='* 40)
    print('\n')
    

Random Forest
Model performance for Training Data
Accuracy : 1.0000
F1 score : 1.0000
Precision : 1.0000
Recall : 1.0000
Roc Auc Score: 1.0000
________________________________________
Model performance for Test Data
Accuracy : 0.9376
F1 score : 0.9335
Precision : 0.9618
Recall : 0.6923
Roc Auc Score: 0.8430


