## **1- Importing Libraries**


In [26]:
import pandas as pd
import numpy as np
from sklearn import datasets
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [27]:
import warnings
warnings.filterwarnings('ignore')

## **2- Loading the Data**


In [76]:
df = pd.read_csv('liver_disease.csv')
df.head()

Unnamed: 0,Age,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,50,7.3,3.7,92,44,236,6.8,1.6,0.3,Yes
1,26,6.8,3.2,140,37,19,3.6,0.9,0.3,Yes
2,35,26.3,12.1,108,168,630,9.2,2.0,0.3,Yes
3,46,1.4,0.4,298,509,623,3.6,1.0,0.3,Yes
4,65,1.1,0.5,686,16,46,5.7,1.5,0.35,Yes


## **3-Data Exploration**

In [77]:
df.describe()


Unnamed: 0,Age,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio
count,583.0,583.0,583.0,583.0,583.0,583.0,583.0,583.0,579.0
mean,44.746141,3.298799,1.486106,290.576329,80.713551,109.910806,6.48319,3.141852,0.947064
std,16.189833,6.209522,2.808498,242.937989,182.620356,288.918529,1.085451,0.795519,0.319592
min,4.0,0.4,0.1,63.0,10.0,10.0,2.7,0.9,0.3
25%,33.0,0.8,0.2,175.5,23.0,25.0,5.8,2.6,0.7
50%,45.0,1.0,0.3,208.0,35.0,42.0,6.6,3.1,0.93
75%,58.0,2.6,1.3,298.0,60.5,87.0,7.2,3.8,1.1
max,90.0,75.0,19.7,2110.0,2000.0,4929.0,9.6,5.5,2.8


### **Check for null values**

In [78]:
df.isnull().sum()

Age                           0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    4
Dataset                       0
dtype: int64

### **Set the empty values to zero**

In [79]:
df["Albumin_and_Globulin_Ratio"].fillna(0, inplace = True)

In [80]:
df.isnull().sum()


Age                           0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    0
Dataset                       0
dtype: int64

## **4-Preparing the Data**

### **Feature Selection**

In [81]:
df.columns

Index(['Age', 'Total_Bilirubin', 'Direct_Bilirubin', 'Alkaline_Phosphotase',
       'Alamine_Aminotransferase', 'Aspartate_Aminotransferase',
       'Total_Protiens', 'Albumin', 'Albumin_and_Globulin_Ratio', 'Dataset'],
      dtype='object')

**We have label as 'Dataset' and rest other as features**

In [82]:
features = ['Age', 'Total_Bilirubin', 'Direct_Bilirubin', 'Alkaline_Phosphotase',
       'Alamine_Aminotransferase', 'Aspartate_Aminotransferase',
       'Total_Protiens', 'Albumin', 'Albumin_and_Globulin_Ratio']
X = df[features]
y = df['Dataset']

### **Split into train and test data**

In [83]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

## **5-Train the Algorithm - Linear SVM**

In [84]:
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(kernel='linear')

### **Making the Prediction**

In [85]:
y_pred = svclassifier.predict(X_test)

## **6-Evaluating the Algorithm- Linear SVM**

### **Accuracy**

In [86]:
accuracy_score(y_test, y_pred)

0.7328767123287672

### **Confusion matrix**

In [87]:
print(confusion_matrix(y_test,y_pred))

[[  0  39]
 [  0 107]]


In [88]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          No       0.00      0.00      0.00        39
         Yes       0.73      1.00      0.85       107

    accuracy                           0.73       146
   macro avg       0.37      0.50      0.42       146
weighted avg       0.54      0.73      0.62       146



## **7-Train the Algorithm - Polynomial Kernel**

In [90]:
svclassifier = SVC(kernel='poly', degree=9)
svclassifier.fit(X_train, y_train)

SVC(degree=9, kernel='poly')

**Making the Prediction**

In [91]:
y_pred = svclassifier.predict(X_test)

## **8-Evaluate the Algorithm - Polynomial Kernel**

### **Accuracy**

In [93]:
accuracy_score(y_test, y_pred)

0.726027397260274

### **Confusion Matrix**

In [94]:
print(confusion_matrix(y_test,y_pred))

[[  0  39]
 [  1 106]]


In [95]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          No       0.00      0.00      0.00        39
         Yes       0.73      0.99      0.84       107

    accuracy                           0.73       146
   macro avg       0.37      0.50      0.42       146
weighted avg       0.54      0.73      0.62       146



## **9-Train the Alogrithm - Gaussian Kernel**

In [96]:
svclassifier = SVC(kernel='rbf')
svclassifier.fit(X_train, y_train)

SVC()

**Making the Prediction**

In [97]:
y_pred = svclassifier.predict(X_test)

## **10-Evalue the Algorithm - Gaussian Kernel**

### **Accuracy**

In [99]:
accuracy_score(y_test, y_pred)

0.7328767123287672

### **Confusion Matrix**

In [100]:
print(confusion_matrix(y_test,y_pred))

[[  0  39]
 [  0 107]]


In [101]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          No       0.00      0.00      0.00        39
         Yes       0.73      1.00      0.85       107

    accuracy                           0.73       146
   macro avg       0.37      0.50      0.42       146
weighted avg       0.54      0.73      0.62       146



## **11- Train the Algorithm - Sigmoid Kernel**

In [102]:
svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(X_train, y_train)

SVC(kernel='sigmoid')

**Making the Prediction**

In [103]:
y_pred = svclassifier.predict(X_test)

## **12-Evaluate the Alogrithm - Sigmoid Kernel**

### **Accuracy**

In [105]:
accuracy_score(y_test, y_pred)

0.678082191780822

### **Confusion Matrix**

In [106]:
print(confusion_matrix(y_test,y_pred))

[[13 26]
 [21 86]]


In [107]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          No       0.38      0.33      0.36        39
         Yes       0.77      0.80      0.79       107

    accuracy                           0.68       146
   macro avg       0.58      0.57      0.57       146
weighted avg       0.66      0.68      0.67       146

