# Liver Diseases Model

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
!pip install catboost



In [7]:
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, CatBoostRegressor

In [8]:
train = pd.read_csv(r'C:\Users\hp\Desktop\Data-Science\06.Data-Science-3MTT\Knowledge Showcase July\train_clean.csv')
test = pd.read_csv(r'C:\Users\hp\Desktop\Data-Science\06.Data-Science-3MTT\Knowledge Showcase July\train_clean.csv')

In [9]:
train = train.drop('ID', axis=1)
test = test.drop('ID', axis=1)

In [10]:
X = train.drop('Stage', axis=1)
y = train['Stage']

In [11]:
# Split the data into 30% validation and 70% training
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=101, stratify=y)

In [12]:
model=CatBoostClassifier(iterations=800,
                              learning_rate=0.1,
                              depth=4,loss_function='MultiClass', 
                         bootstrap_type='Bernoulli',
                              subsample=0.9,
                              eval_metric='TotalF1',
                              metric_period=20,
                              allow_writing_files=False)

In [13]:
model.fit(X_train, y_train, eval_set=(X_val, y_val))

0:	learn: 0.3521026	test: 0.3521094	best: 0.3521094 (0)	total: 146ms	remaining: 1m 56s
20:	learn: 0.3507487	test: 0.3509068	best: 0.3521094 (0)	total: 334ms	remaining: 12.4s
40:	learn: 0.3507487	test: 0.3509068	best: 0.3521094 (0)	total: 578ms	remaining: 10.7s
60:	learn: 0.3507487	test: 0.3509068	best: 0.3521094 (0)	total: 761ms	remaining: 9.21s
80:	learn: 0.3516847	test: 0.3509068	best: 0.3521094 (0)	total: 975ms	remaining: 8.66s
100:	learn: 0.3548960	test: 0.3522216	best: 0.3522216 (100)	total: 1.15s	remaining: 7.97s
120:	learn: 0.3590556	test: 0.3517788	best: 0.3522216 (100)	total: 1.33s	remaining: 7.47s
140:	learn: 0.3631609	test: 0.3517814	best: 0.3522216 (100)	total: 1.49s	remaining: 6.96s
160:	learn: 0.3708179	test: 0.3539323	best: 0.3539323 (160)	total: 1.66s	remaining: 6.61s
180:	learn: 0.3784458	test: 0.3557032	best: 0.3557032 (180)	total: 1.81s	remaining: 6.21s
200:	learn: 0.3878435	test: 0.3530046	best: 0.3557032 (180)	total: 1.98s	remaining: 5.91s
220:	learn: 0.4011621	tes

<catboost.core.CatBoostClassifier at 0x2803e7c27e0>

In [14]:
y_pred=model.predict(test)

In [16]:
from sklearn.metrics import f1_score

In [17]:
print(f1_score(y_val, model.predict(X_val), average='micro'))

0.5029411764705882


In [18]:
pred = model.predict(test)

In [None]:
res = pd.DataFrame(pred) 
res.index = test.index 
res.columns = ["Stage"]

In [20]:
res['Stage'].value_counts()

Stage
4.0    5791
2.0     596
3.0     368
1.0      45
Name: count, dtype: int64

In [21]:
res.to_csv('prediction_results.csv', index=False)

In [22]:
import joblib

# Saving the model
joblib.dump(model, 'liver_disease_model.pkl')

['liver_disease_model.pkl']

In [23]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler

# Load the saved model
with open('liver_disease_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Function to preprocess new data and make predictions
def predict_liver_disease(new_data):
    """
    Predict liver disease stage based on input features
    
    Args:
    new_data (DataFrame): DataFrame containing the same features used in training
    
    Returns:
    array: Predicted stages (1-4)
    """
    # Preprocess the data (same as training preprocessing)
    scaler = StandardScaler()
    X_new = scaler.fit_transform(new_data)
    
    # Make predictions
    predictions = model.predict(X_new)
    
    return predictions