## IMPORT THE LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
import flask
import pickle

## READ THE DATASET

In [None]:
df = pd.read_csv("/content/indian_liver_patient.csv")

In [None]:
df.head()

## VISUALIZATION

## UNI VARIATE ANALYSIS

In [None]:
df['Gender'].value_counts()

In [None]:
sns.barplot (df.Gender.value_counts().index , df.Gender.value_counts())

In [None]:
plt.pie(df.Gender.value_counts(),[0.2,0],labels=['M','F'],autopct="%1.1f%%",colors=['blue','red'])

## BI VARIATE ANALYSIS

In [None]:
plt.figure(figsize=(5,3))
sns.lineplot(df.Age,df.Dataset)

## MULTI-VARIATE ANALYSIS

In [None]:
sns.pairplot(df)

In [None]:
sns.pairplot(df,hue='Dataset',kind ='reg')

## CORRELATION BETWEEN NUMERICAL COLUMNS THROUGH HEAT MAP

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(df.corr(), annot=True)
plt.show()

## CHECKING NULL VALUES AND REPLACING THEM 

In [None]:
df.isnull().any()

In [None]:
df.isnull().sum()

In [None]:
df['Albumin_and_Globulin_Ratio'].fillna((df['Albumin_and_Globulin_Ratio'].mean()), inplace=True)

In [None]:
df.isnull().any()

## SPLITTING THE DATASET INTO X= INDEPENDENT VARIABLES AND Y = DEPENDENT VARIABLES

In [None]:
X=df.iloc[:,0:-1]
y=df.iloc[:,-1]

## PERFORM ENCODING

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [None]:
X.Gender=le.fit_transform(X.Gender)

In [None]:
X.head()

## SCALING

In [None]:
from sklearn.preprocessing import scale

In [None]:
x_scaled=pd.DataFrame(scale(X),columns=X.columns)
x_scaled.head()

## SPLIT THE DATA INTO TRAINING AND TESTING

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.25,random_state = 47)

## MODEL BUILDING

In [None]:
from sklearn.ensemble import RandomForestClassifier 

In [None]:
RFmodel = RandomForestClassifier()

In [None]:
RFmodel.fit(x_train,y_train)

In [None]:
y_predict = RFmodel.predict(x_test)
y_predict

In [None]:
y_predict_train = RFmodel.predict(x_train)
y_predict

## EVALUATION MODEL

In [None]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [None]:
print('Testing accuracy = ', accuracy_score(y_test,y_predict))
print("Training accuracy= ",accuracy_score(y_train,y_predict_train))

## CONFUSION MATRIX

In [None]:
pd.crosstab(y_test,y_predict)

In [None]:
pd.crosstab(y_test,y_predict)

## USER INPUT

In [None]:
Age = int(input())
Gender = input()
Total_Bilirubin = float(input())
Direct_Bilirubin	= float(input())
Alkaline_Phosphotase	= int(input())
Alamine_Aminotransferase	= int(input())
Aspartate_Aminotransferase	= int(input())
Total_Protiens	= float(input())
Albumin = float(input())
Albumin_and_Globulin_Ratio = float(input())

In [None]:
data = [[Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio]]

In [None]:
data = pd.DataFrame(data)

In [None]:
data

In [None]:
data.columns =['Age','Gender','Total_Bilirubin','Direct_Bilirubin','Alkaline_Phosphotase','Alamine_Aminotransferase','Aspartate_Aminotransferase','Total_Protiens','Albumin','Albumin_and_Globulin_Ratio']

In [None]:
data

In [None]:
data['Gender']

## ENCODING

In [None]:
data.Gender=le.fit_transform(data.Gender)

In [None]:
P= RFmodel.predict(data)

In [None]:
P
# actual output is : 1

In [None]:
import joblib
joblib.dump(RFmodel,'model.pkl')