# Indian Liver Patient Records

In [1]:
import pandas as pd
import numpy as np

### Import data from UCI

In [2]:
df=pd.read_csv('indian_liver_patient.csv')
print(df.iloc[310:320, :])


     Age  Gender  Total_Bilirubin  Direct_Bilirubin  Alkaline_Phosphotase  \
310   51    Male              0.8               0.2                   175   
311   54  Female             23.2              12.6                   574   
312   27    Male              1.3               0.6                   106   
313   30  Female              0.8               0.2                   158   
314   26    Male              2.0               0.9                   195   
315   22    Male              0.9               0.3                   179   
316   44    Male              0.9               0.2                   182   
317   35    Male              0.7               0.2                   198   
318   38    Male              3.7               2.2                   216   
319   14    Male              0.9               0.3                   310   

     Alamine_Aminotransferase  Aspartate_Aminotransferase  Total_Protiens  \
310                        48                          22             8.1  

### Investigate the DF

In [3]:
df.info()
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 11 columns):
Age                           583 non-null int64
Gender                        583 non-null object
Total_Bilirubin               583 non-null float64
Direct_Bilirubin              583 non-null float64
Alkaline_Phosphotase          583 non-null int64
Alamine_Aminotransferase      583 non-null int64
Aspartate_Aminotransferase    583 non-null int64
Total_Protiens                583 non-null float64
Albumin                       583 non-null float64
Albumin_and_Globulin_Ratio    579 non-null float64
Dataset                       583 non-null int64
dtypes: float64(5), int64(5), object(1)
memory usage: 50.2+ KB
Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                   

### Sort the dataset from ML

In [4]:
# Drop NaNs
Data = df.dropna()

# Convert categorical variable into dummy/indicator variables. 
Data=pd.get_dummies(Data,drop_first=True)

# Convert Dataset column values from two to zero (boolean)
Data['Dataset'].replace({2,1},inplace=True)

# Rename columns for ease of use
Data.rename(columns={'Gender_Male':'Is_male','Dataset':'Liver_disease'},inplace=True)

# Define ML input (X) and output (y)
X=Data.drop('Liver_disease',axis=1)
y=Data.Liver_disease


### Build Train and Test data

In [12]:
from sklearn.model_selection import train_test_split

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=10)

### Define the bagging classifier


In [13]:
# Import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier

# Import BaggingClassifier
from sklearn.ensemble import BaggingClassifier

# Instantiate dt
dt = DecisionTreeClassifier(random_state=1)

# Instantiate bc
bc = BaggingClassifier(base_estimator=dt, n_estimators=50, random_state=1)

### Decision Tree Accuracy

In [17]:
# Import DecisionTreeClassifier from sklearn.tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Instantiate a DecisionTreeClassifier 'dt' with a maximum depth of 6
dt = DecisionTreeClassifier(max_depth=6, random_state=10)

# Fit dt to the training set
dt = dt.fit(X_train,y_train)

# Predict test set labels
y_pred = dt.predict(X_test)
acc_test = accuracy_score(y_test, y_pred)
print('Test set accuracy of bc: {:.2f}'.format(acc_test))

Test set accuracy of bc: 0.68


### Evaluate Bagging performance

In [18]:
# Fit bc to the training set
bc.fit(X_train, y_train)

# Predict test set labels
y_pred = bc.predict(X_test)

# Evaluate acc_test
acc_test = accuracy_score(y_test, y_pred)
print('Test set accuracy of bc: {:.2f}'.format(acc_test))

Test set accuracy of bc: 0.74
