# Liver Cirrhosis Stage Detection Project

Cell 1 - Importing necesarry Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score,recall_score,confusion_matrix
import pickle

Cell 2 - Reading and Importing Data

In [None]:
dataset = pd.read_csv('liver_cirrhosis.csv')
# print(dataset.head())

Cell 3 - Checking for null values and names of features

In [None]:
# print(dataset.columns.tolist())

# print(dataset.isnull().sum()) # No empty values

Cell 4 - Storing feature names and dataset features

In [None]:
categorical_columns = ['Status','Drug','Sex','Ascites', 'Hepatomegaly', 'Spiders', 'Edema']

numerical_columns = ['N_Days', 'Age', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin']

X = dataset.drop(columns = ['Stage'])
y = dataset['Stage']

Cell 5 - Splitting the data for training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state= 0)

Cell 6 - Implementing Pipeline

In [None]:
preprocces = ColumnTransformer([
    ('scaler', StandardScaler(), numerical_columns),
    ('ohe', OneHotEncoder(sparse_output=False,handle_unknown='ignore'),categorical_columns)
],remainder='passthrough')

pipeline = Pipeline([
    ('preprocess', preprocces),
    ('rf',RandomForestClassifier())
])

pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)

Cell 7 - Evaluation of Model

In [None]:
print('Precision', precision_score(y_test,y_pred, average='weighted'))
print('Recall', recall_score(y_test,y_pred,average='weighted'))
print('Confusion Matrix \n', confusion_matrix(y_test,y_pred))

Cell 8 - Exporting pipeline

In [None]:
pickle.dump(pipeline, open('pipeline.pkl', 'wb'))