In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score,classification_report
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.feature_selection import RFE,RFECV
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')

In [2]:
df= pd.read_csv('horse.csv')

In [3]:
df.head()

Unnamed: 0,surgery,age,hospital_number,rectal_temp,pulse,respiratory_rate,temp_of_extremities,peripheral_pulse,mucous_membrane,capillary_refill_time,...,packed_cell_volume,total_protein,abdomo_appearance,abdomo_protein,outcome,surgical_lesion,lesion_1,lesion_2,lesion_3,cp_data
0,no,adult,530101,38.5,66.0,28.0,cool,reduced,,more_3_sec,...,45.0,8.4,,,died,no,11300,0,0,no
1,yes,adult,534817,39.2,88.0,20.0,,,pale_cyanotic,less_3_sec,...,50.0,85.0,cloudy,2.0,euthanized,no,2208,0,0,no
2,no,adult,530334,38.3,40.0,24.0,normal,normal,pale_pink,less_3_sec,...,33.0,6.7,,,lived,no,0,0,0,yes
3,yes,young,5290409,39.1,164.0,84.0,cold,normal,dark_cyanotic,more_3_sec,...,48.0,7.2,serosanguious,5.3,died,yes,2208,0,0,yes
4,no,adult,530255,37.3,104.0,35.0,,,dark_cyanotic,more_3_sec,...,74.0,7.4,,,died,no,4300,0,0,no


In [4]:
df.shape

(299, 28)

In [5]:
target=df[['outcome']]
features= df.drop(columns='outcome')

In [6]:
features.head()

Unnamed: 0,surgery,age,hospital_number,rectal_temp,pulse,respiratory_rate,temp_of_extremities,peripheral_pulse,mucous_membrane,capillary_refill_time,...,abdomen,packed_cell_volume,total_protein,abdomo_appearance,abdomo_protein,surgical_lesion,lesion_1,lesion_2,lesion_3,cp_data
0,no,adult,530101,38.5,66.0,28.0,cool,reduced,,more_3_sec,...,distend_large,45.0,8.4,,,no,11300,0,0,no
1,yes,adult,534817,39.2,88.0,20.0,,,pale_cyanotic,less_3_sec,...,other,50.0,85.0,cloudy,2.0,no,2208,0,0,no
2,no,adult,530334,38.3,40.0,24.0,normal,normal,pale_pink,less_3_sec,...,normal,33.0,6.7,,,no,0,0,0,yes
3,yes,young,5290409,39.1,164.0,84.0,cold,normal,dark_cyanotic,more_3_sec,...,,48.0,7.2,serosanguious,5.3,yes,2208,0,0,yes
4,no,adult,530255,37.3,104.0,35.0,,,dark_cyanotic,more_3_sec,...,,74.0,7.4,,,no,4300,0,0,no


In [7]:
features.isnull().sum()

surgery                    0
age                        0
hospital_number            0
rectal_temp               60
pulse                     24
respiratory_rate          58
temp_of_extremities       56
peripheral_pulse          69
mucous_membrane           47
capillary_refill_time     32
pain                      55
peristalsis               44
abdominal_distention      56
nasogastric_tube         104
nasogastric_reflux       106
nasogastric_reflux_ph    246
rectal_exam_feces        102
abdomen                  118
packed_cell_volume        29
total_protein             33
abdomo_appearance        165
abdomo_protein           198
surgical_lesion            0
lesion_1                   0
lesion_2                   0
lesion_3                   0
cp_data                    0
dtype: int64

In [8]:
num_col= [i for i in features.columns if features[i].dtype!='O']
print(num_col)
cat_col= [i for i in features.columns if features[i].dtype=='O']
print(cat_col)

['hospital_number', 'rectal_temp', 'pulse', 'respiratory_rate', 'nasogastric_reflux_ph', 'packed_cell_volume', 'total_protein', 'abdomo_protein', 'lesion_1', 'lesion_2', 'lesion_3']
['surgery', 'age', 'temp_of_extremities', 'peripheral_pulse', 'mucous_membrane', 'capillary_refill_time', 'pain', 'peristalsis', 'abdominal_distention', 'nasogastric_tube', 'nasogastric_reflux', 'rectal_exam_feces', 'abdomen', 'abdomo_appearance', 'surgical_lesion', 'cp_data']


In [9]:
features[num_col]

Unnamed: 0,hospital_number,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
0,530101,38.5,66.0,28.0,,45.0,8.4,,11300,0,0
1,534817,39.2,88.0,20.0,,50.0,85.0,2.0,2208,0,0
2,530334,38.3,40.0,24.0,,33.0,6.7,,0,0,0
3,5290409,39.1,164.0,84.0,5.0,48.0,7.2,5.3,2208,0,0
4,530255,37.3,104.0,35.0,,74.0,7.4,,4300,0,0
...,...,...,...,...,...,...,...,...,...,...,...
294,533886,,120.0,70.0,,55.0,65.0,,3205,0,0
295,527702,37.2,72.0,24.0,,44.0,,3.3,2208,0,0
296,529386,37.5,72.0,30.0,,60.0,6.8,,3205,0,0
297,530612,36.5,100.0,24.0,,50.0,6.0,3.4,2208,0,0


In [10]:

features[cat_col]

Unnamed: 0,surgery,age,temp_of_extremities,peripheral_pulse,mucous_membrane,capillary_refill_time,pain,peristalsis,abdominal_distention,nasogastric_tube,nasogastric_reflux,rectal_exam_feces,abdomen,abdomo_appearance,surgical_lesion,cp_data
0,no,adult,cool,reduced,,more_3_sec,extreme_pain,absent,severe,,,decreased,distend_large,,no,no
1,yes,adult,,,pale_cyanotic,less_3_sec,mild_pain,absent,slight,,,absent,other,cloudy,no,no
2,no,adult,normal,normal,pale_pink,less_3_sec,mild_pain,hypomotile,none,,,normal,normal,,no,yes
3,yes,young,cold,normal,dark_cyanotic,more_3_sec,depressed,absent,severe,none,less_1_liter,decreased,,serosanguious,yes,yes
4,no,adult,,,dark_cyanotic,more_3_sec,,,,,,,,,no,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,yes,adult,cold,,pale_cyanotic,more_3_sec,depressed,absent,,,,,distend_large,,no,no
295,no,adult,cool,increased,pale_cyanotic,more_3_sec,severe_pain,hypomotile,moderate,significant,none,absent,distend_small,serosanguious,yes,yes
296,yes,adult,cold,reduced,pale_cyanotic,less_3_sec,severe_pain,absent,moderate,slight,none,decreased,distend_large,,yes,no
297,yes,adult,cool,reduced,pale_pink,less_3_sec,mild_pain,hypomotile,moderate,significant,none,absent,distend_small,serosanguious,yes,yes


In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler

In [12]:
Ordinal=make_pipeline(OrdinalEncoder())

In [13]:
transformer=ColumnTransformer([('ord',Ordinal,cat_col)])

In [14]:
transformer.get_feature_names_out

<bound method ColumnTransformer.get_feature_names_out of ColumnTransformer(transformers=[('ord',
                                 Pipeline(steps=[('ordinalencoder',
                                                  OrdinalEncoder())]),
                                 ['surgery', 'age', 'temp_of_extremities',
                                  'peripheral_pulse', 'mucous_membrane',
                                  'capillary_refill_time', 'pain',
                                  'peristalsis', 'abdominal_distention',
                                  'nasogastric_tube', 'nasogastric_reflux',
                                  'rectal_exam_feces', 'abdomen',
                                  'abdomo_appearance', 'surgical_lesion',
                                  'cp_data'])])>

In [15]:
df1=pd.DataFrame(transformer.fit_transform(features[cat_col]),columns=transformer.get_feature_names_out())
df1

Unnamed: 0,ord__surgery,ord__age,ord__temp_of_extremities,ord__peripheral_pulse,ord__mucous_membrane,ord__capillary_refill_time,ord__pain,ord__peristalsis,ord__abdominal_distention,ord__nasogastric_tube,ord__nasogastric_reflux,ord__rectal_exam_feces,ord__abdomen,ord__abdomo_appearance,ord__surgical_lesion,ord__cp_data
0,0.0,0.0,1.0,3.0,,2.0,2.0,0.0,2.0,,,1.0,0.0,,0.0,0.0
1,1.0,0.0,,,4.0,1.0,3.0,0.0,3.0,,,0.0,4.0,1.0,0.0,0.0
2,0.0,0.0,2.0,2.0,5.0,1.0,3.0,2.0,1.0,,,3.0,3.0,,0.0,1.0
3,1.0,1.0,0.0,2.0,2.0,2.0,1.0,0.0,2.0,0.0,0.0,1.0,,2.0,1.0,1.0
4,0.0,0.0,,,2.0,2.0,,,,,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,1.0,0.0,0.0,,4.0,2.0,1.0,0.0,,,,,0.0,,0.0,0.0
295,0.0,0.0,1.0,1.0,4.0,2.0,4.0,2.0,0.0,1.0,2.0,0.0,1.0,2.0,1.0,1.0
296,1.0,0.0,0.0,3.0,4.0,1.0,4.0,0.0,0.0,2.0,2.0,1.0,0.0,,1.0,0.0
297,1.0,0.0,1.0,3.0,5.0,1.0,3.0,2.0,0.0,1.0,2.0,0.0,1.0,2.0,1.0,1.0


In [16]:
simple_impute= SimpleImputer(missing_values=np.nan,strategy='most_frequent')

In [17]:
df1=pd.DataFrame(simple_impute.fit_transform(df1),columns=df1.columns)
df1

Unnamed: 0,ord__surgery,ord__age,ord__temp_of_extremities,ord__peripheral_pulse,ord__mucous_membrane,ord__capillary_refill_time,ord__pain,ord__peristalsis,ord__abdominal_distention,ord__nasogastric_tube,ord__nasogastric_reflux,ord__rectal_exam_feces,ord__abdomen,ord__abdomo_appearance,ord__surgical_lesion,ord__cp_data
0,0.0,0.0,1.0,3.0,3.0,2.0,2.0,0.0,2.0,2.0,2.0,1.0,0.0,1.0,0.0,0.0
1,1.0,0.0,1.0,2.0,4.0,1.0,3.0,0.0,3.0,2.0,2.0,0.0,4.0,1.0,0.0,0.0
2,0.0,0.0,2.0,2.0,5.0,1.0,3.0,2.0,1.0,2.0,2.0,3.0,3.0,1.0,0.0,1.0
3,1.0,1.0,0.0,2.0,2.0,2.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,1.0,1.0
4,0.0,0.0,1.0,2.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,1.0,0.0,0.0,2.0,4.0,2.0,1.0,0.0,1.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0
295,0.0,0.0,1.0,1.0,4.0,2.0,4.0,2.0,0.0,1.0,2.0,0.0,1.0,2.0,1.0,1.0
296,1.0,0.0,0.0,3.0,4.0,1.0,4.0,0.0,0.0,2.0,2.0,1.0,0.0,1.0,1.0,0.0
297,1.0,0.0,1.0,3.0,5.0,1.0,3.0,2.0,0.0,1.0,2.0,0.0,1.0,2.0,1.0,1.0


In [18]:
features[num_col]
simple_impute1= SimpleImputer(missing_values=np.nan,strategy='median')
df2=pd.DataFrame(simple_impute1.fit_transform(features[num_col]),columns=num_col)
df2

Unnamed: 0,hospital_number,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
0,530101.0,38.5,66.0,28.0,5.0,45.0,8.4,2.3,11300.0,0.0,0.0
1,534817.0,39.2,88.0,20.0,5.0,50.0,85.0,2.0,2208.0,0.0,0.0
2,530334.0,38.3,40.0,24.0,5.0,33.0,6.7,2.3,0.0,0.0,0.0
3,5290409.0,39.1,164.0,84.0,5.0,48.0,7.2,5.3,2208.0,0.0,0.0
4,530255.0,37.3,104.0,35.0,5.0,74.0,7.4,2.3,4300.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
294,533886.0,38.2,120.0,70.0,5.0,55.0,65.0,2.3,3205.0,0.0,0.0
295,527702.0,37.2,72.0,24.0,5.0,44.0,7.5,3.3,2208.0,0.0,0.0
296,529386.0,37.5,72.0,30.0,5.0,60.0,6.8,2.3,3205.0,0.0,0.0
297,530612.0,36.5,100.0,24.0,5.0,50.0,6.0,3.4,2208.0,0.0,0.0


In [19]:
std_sca=StandardScaler()

In [20]:
df2=pd.DataFrame(std_sca.fit_transform(df2),columns=df2.columns)

In [21]:
df2

Unnamed: 0,hospital_number,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
0,-0.364592,0.496498,-0.194771,-0.087726,0.062143,-0.119170,-0.533355,-0.209831,1.415021,-0.139371,-0.057928
1,-0.361509,1.565603,0.604982,-0.588538,0.062143,0.385530,2.380057,-0.461797,-0.268864,-0.139371,-0.057928
2,-0.364440,0.191039,-1.139933,-0.338132,0.062143,-1.330449,-0.598013,-0.209831,-0.677797,-0.139371,-0.057928
3,2.747804,1.412873,3.367763,3.417962,0.062143,0.183650,-0.578996,2.309828,-0.268864,-0.139371,-0.057928
4,-0.364491,-1.336253,1.186620,0.350485,0.062143,2.808088,-0.571389,-0.209831,0.118585,-0.139371,-0.057928
...,...,...,...,...,...,...,...,...,...,...,...
294,-0.362117,0.038310,1.768258,2.541540,0.062143,0.890229,1.619375,-0.209831,-0.084214,-0.139371,-0.057928
295,-0.366161,-1.488983,0.023343,-0.338132,0.062143,-0.220110,-0.567586,0.630055,-0.268864,-0.139371,-0.057928
296,-0.365060,-1.030795,0.023343,0.037477,0.062143,1.394929,-0.594210,-0.209831,-0.084214,-0.139371,-0.057928
297,-0.364258,-2.558088,1.041210,-0.338132,0.062143,0.385530,-0.624637,0.714044,-0.268864,-0.139371,-0.057928


In [22]:
df_new=pd.concat([df1,df2],axis=1)

In [23]:
df_new.head()

Unnamed: 0,ord__surgery,ord__age,ord__temp_of_extremities,ord__peripheral_pulse,ord__mucous_membrane,ord__capillary_refill_time,ord__pain,ord__peristalsis,ord__abdominal_distention,ord__nasogastric_tube,...,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
0,0.0,0.0,1.0,3.0,3.0,2.0,2.0,0.0,2.0,2.0,...,0.496498,-0.194771,-0.087726,0.062143,-0.11917,-0.533355,-0.209831,1.415021,-0.139371,-0.057928
1,1.0,0.0,1.0,2.0,4.0,1.0,3.0,0.0,3.0,2.0,...,1.565603,0.604982,-0.588538,0.062143,0.38553,2.380057,-0.461797,-0.268864,-0.139371,-0.057928
2,0.0,0.0,2.0,2.0,5.0,1.0,3.0,2.0,1.0,2.0,...,0.191039,-1.139933,-0.338132,0.062143,-1.330449,-0.598013,-0.209831,-0.677797,-0.139371,-0.057928
3,1.0,1.0,0.0,2.0,2.0,2.0,1.0,0.0,2.0,0.0,...,1.412873,3.367763,3.417962,0.062143,0.18365,-0.578996,2.309828,-0.268864,-0.139371,-0.057928
4,0.0,0.0,1.0,2.0,2.0,2.0,3.0,2.0,1.0,2.0,...,-1.336253,1.18662,0.350485,0.062143,2.808088,-0.571389,-0.209831,0.118585,-0.139371,-0.057928


In [24]:
df_new.shape

(299, 27)

In [25]:
import scipy.stats as stats

In [26]:
def outlier_remove_z_score(df,columns):
    thershold= 3
    z_score= stats.zscore(df[columns])
    return df[(abs(z_score)<thershold).all(axis=1)]

In [27]:
df_new1=outlier_remove_z_score(df_new,df_new.columns)

In [28]:
df_new1

Unnamed: 0,ord__surgery,ord__age,ord__temp_of_extremities,ord__peripheral_pulse,ord__mucous_membrane,ord__capillary_refill_time,ord__pain,ord__peristalsis,ord__abdominal_distention,ord__nasogastric_tube,...,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
0,0.0,0.0,1.0,3.0,3.0,2.0,2.0,0.0,2.0,2.0,...,0.496498,-0.194771,-0.087726,0.062143,-0.119170,-0.533355,-0.209831,1.415021,-0.139371,-0.057928
1,1.0,0.0,1.0,2.0,4.0,1.0,3.0,0.0,3.0,2.0,...,1.565603,0.604982,-0.588538,0.062143,0.385530,2.380057,-0.461797,-0.268864,-0.139371,-0.057928
2,0.0,0.0,2.0,2.0,5.0,1.0,3.0,2.0,1.0,2.0,...,0.191039,-1.139933,-0.338132,0.062143,-1.330449,-0.598013,-0.209831,-0.677797,-0.139371,-0.057928
4,0.0,0.0,1.0,2.0,2.0,2.0,3.0,2.0,1.0,2.0,...,-1.336253,1.186620,0.350485,0.062143,2.808088,-0.571389,-0.209831,0.118585,-0.139371,-0.057928
5,0.0,0.0,3.0,2.0,5.0,1.0,1.0,2.0,3.0,2.0,...,0.038310,-0.267476,-0.275531,0.062143,-0.119170,-0.567586,-0.209831,-0.677797,-0.139371,-0.057928
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,1.0,0.0,0.0,2.0,4.0,2.0,1.0,0.0,1.0,2.0,...,0.038310,1.768258,2.541540,0.062143,0.890229,1.619375,-0.209831,-0.084214,-0.139371,-0.057928
295,0.0,0.0,1.0,1.0,4.0,2.0,4.0,2.0,0.0,1.0,...,-1.488983,0.023343,-0.338132,0.062143,-0.220110,-0.567586,0.630055,-0.268864,-0.139371,-0.057928
296,1.0,0.0,0.0,3.0,4.0,1.0,4.0,0.0,0.0,2.0,...,-1.030795,0.023343,0.037477,0.062143,1.394929,-0.594210,-0.209831,-0.084214,-0.139371,-0.057928
297,1.0,0.0,1.0,3.0,5.0,1.0,3.0,2.0,0.0,1.0,...,-2.558088,1.041210,-0.338132,0.062143,0.385530,-0.624637,0.714044,-0.268864,-0.139371,-0.057928


In [29]:
target.shape

(299, 1)

In [30]:
df_new2=pd.merge(df_new1,target,left_index=True,right_index=True)

In [31]:
df_new2

Unnamed: 0,ord__surgery,ord__age,ord__temp_of_extremities,ord__peripheral_pulse,ord__mucous_membrane,ord__capillary_refill_time,ord__pain,ord__peristalsis,ord__abdominal_distention,ord__nasogastric_tube,...,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3,outcome
0,0.0,0.0,1.0,3.0,3.0,2.0,2.0,0.0,2.0,2.0,...,-0.194771,-0.087726,0.062143,-0.119170,-0.533355,-0.209831,1.415021,-0.139371,-0.057928,died
1,1.0,0.0,1.0,2.0,4.0,1.0,3.0,0.0,3.0,2.0,...,0.604982,-0.588538,0.062143,0.385530,2.380057,-0.461797,-0.268864,-0.139371,-0.057928,euthanized
2,0.0,0.0,2.0,2.0,5.0,1.0,3.0,2.0,1.0,2.0,...,-1.139933,-0.338132,0.062143,-1.330449,-0.598013,-0.209831,-0.677797,-0.139371,-0.057928,lived
4,0.0,0.0,1.0,2.0,2.0,2.0,3.0,2.0,1.0,2.0,...,1.186620,0.350485,0.062143,2.808088,-0.571389,-0.209831,0.118585,-0.139371,-0.057928,died
5,0.0,0.0,3.0,2.0,5.0,1.0,1.0,2.0,3.0,2.0,...,-0.267476,-0.275531,0.062143,-0.119170,-0.567586,-0.209831,-0.677797,-0.139371,-0.057928,lived
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,1.0,0.0,0.0,2.0,4.0,2.0,1.0,0.0,1.0,2.0,...,1.768258,2.541540,0.062143,0.890229,1.619375,-0.209831,-0.084214,-0.139371,-0.057928,euthanized
295,0.0,0.0,1.0,1.0,4.0,2.0,4.0,2.0,0.0,1.0,...,0.023343,-0.338132,0.062143,-0.220110,-0.567586,0.630055,-0.268864,-0.139371,-0.057928,euthanized
296,1.0,0.0,0.0,3.0,4.0,1.0,4.0,0.0,0.0,2.0,...,0.023343,0.037477,0.062143,1.394929,-0.594210,-0.209831,-0.084214,-0.139371,-0.057928,died
297,1.0,0.0,1.0,3.0,5.0,1.0,3.0,2.0,0.0,1.0,...,1.041210,-0.338132,0.062143,0.385530,-0.624637,0.714044,-0.268864,-0.139371,-0.057928,lived


In [32]:
features_new= df_new2.drop(columns='outcome')
target_new=df_new2[['outcome']]

In [33]:
x_train,x_test,y_train,y_test= train_test_split(features_new,target_new,train_size=0.75,random_state=100)

In [34]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(167, 27)
(56, 27)
(167, 1)
(56, 1)


In [35]:
## Naive_Bayes

In [36]:
from sklearn.naive_bayes import BernoulliNB

In [37]:
bern= BernoulliNB()

In [38]:
bern.fit(x_train,y_train)

In [39]:
y_pred= bern.predict(x_test)

In [40]:
accuracy_score(y_test,y_pred)

0.6428571428571429

In [41]:
## Bagging_Classifier

In [42]:
from sklearn.ensemble import BaggingClassifier

In [43]:
bag_class= BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=50,max_samples=1.0,max_features=0.9,bootstrap=True)

In [44]:
bag_class.fit(x_train,y_train)

In [45]:
y_pred= bag_class.predict(x_test)

In [46]:
accuracy_score(y_test,y_pred)

0.75

In [47]:
## Random_Forest
from sklearn.ensemble import RandomForestClassifier

In [48]:
rand_forest= RandomForestClassifier(n_estimators=50,bootstrap=True,max_samples=1.0,max_features='sqrt')

In [49]:
rand_forest.fit(x_train,y_train)

In [50]:
y_pred= rand_forest.predict(x_test)

In [51]:
accuracy_score(y_test,y_pred)

0.75

In [52]:
x_train.shape

(167, 27)

In [53]:
167*0.9

150.3

In [54]:
## Gradient_Boosting

In [55]:
from sklearn.ensemble import GradientBoostingClassifier

In [56]:
grad_boost= GradientBoostingClassifier(n_estimators=100, learning_rate=0.5, max_depth=2,max_features=1.0,subsample=0.9)

In [57]:
grad_boost.fit(x_train,y_train)

In [58]:
y_pred= grad_boost.predict(x_test)

In [59]:
accuracy_score(y_test,y_pred)

0.6964285714285714

## xgboost

In [60]:
!pip install xgboost



In [61]:
import xgboost as xgb

In [62]:
xgb= xgb.XGBClassifier(n_estimators=100,min_child_weight=2, learning_rate=0.5,reg_lambda=1,reg_alpha=2)

In [63]:
from sklearn.preprocessing import LabelEncoder
lab_enc= LabelEncoder()
y_train= lab_enc.fit_transform(y_train)
y_test= lab_enc.fit_transform(y_test)

In [64]:
xgb.fit(x_train,y_train)

In [65]:
y_pred= xgb.predict(x_test)

In [66]:
accuracy_score(y_test,y_pred)

0.75

## LightGBM

In [67]:
!pip install lightgbm

Collecting lightgbm
  Using cached lightgbm-4.5.0-py3-none-win_amd64.whl.metadata (17 kB)
Using cached lightgbm-4.5.0-py3-none-win_amd64.whl (1.4 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-4.5.0


In [68]:
import lightgbm as lgb

In [69]:
lgb=lgb.LGBMClassifier(n_estimator=100,min_child_weight=2,learning_rate=0.4,subsample=0.9,max_bins=30,max_depth=3,boosting_type='goss',top_rate=0.3,other_rate=0.2)

In [70]:
lgb.fit(x_train,y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 167, number of used features: 23
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score -1.454432
[LightGBM] [Info] Start training from score -1.982500
[LightGBM] [Info] Start training from score -0.464033


In [71]:
y_pred= lgb.predict(x_test)



In [72]:
accuracy_score(y_test,y_pred)

0.7142857142857143