In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from styles import *

In [2]:
df = pd.read_csv('./data/animal_disease_prediction_cleaned.csv')

In [3]:
le = LabelEncoder()
df['Animal_Type'] = le.fit_transform(df['Animal_Type'])
df['Breed'] = le.fit_transform(df['Breed'])
df['Gender'] = le.fit_transform(df['Gender'])
disease_encoder = LabelEncoder()
df['Disease_Prediction'] = disease_encoder.fit_transform(df['Disease_Prediction'])

In [4]:
print("Número de enfermedades únicas:", len(disease_encoder.classes_))
print("\nEnfermedades:")
for i, disease in enumerate(disease_encoder.classes_):
    print(f"{i}: {disease}")

Número de enfermedades únicas: 31

Enfermedades:
0: Allergic Rhinitis
1: Arthritis
2: Asthma
3: Bronchitis
4: Chlamydia
5: Conjunctivitis
6: Coronavirus
7: Cough
8: Distemper
9: FIV
10: Flu
11: Fungal Infection
12: Gastroenteritis
13: Heartworm
14: Hepatitis
15: Herpes
16: Hyperthyroidism
17: IBD
18: Intestinal Parasites
19: Kidney Disease
20: Leptospirosis
21: Leukemia
22: Lyme Disease
23: Pancreatitis
24: Panleukopenia
25: Parvovirus
26: Peritonitis
27: Respiratory Disease
28: Respiratory Infection
29: Ringworm
30: Tick-Borne Disease


In [5]:
features = ['Animal_Type', 'Breed', 'Age', 'Gender', 'Weight', 
           'Appetite_Loss', 'Vomiting', 'Diarrhea', 'Coughing',
           'Labored_Breathing', 'Lameness', 'Skin_Lesions',
           'Nasal_Discharge', 'Eye_Discharge', 'Body_Temperature', 'Heart_Rate']

X = df[features]
y = df['Disease_Prediction']

In [6]:
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [8]:
# 1. Regresión Logística
lr_model = LogisticRegression(max_iter=1000, multi_class='multinomial')
lr_model.fit(X_train, y_train)

lr_pred_val = lr_model.predict(X_val)
lr_pred_test = lr_model.predict(X_test)



In [9]:
# 2. Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

nb_pred_val = nb_model.predict(X_val)
nb_pred_test = nb_model.predict(X_test)

In [10]:
def plot_results(y_val, y_test, y_pred_val, y_pred_test, model_name):
    print(f"\nResultados para {model_name}:")
    
    print("\nReporte de Clasificación - Validación:")
    print(classification_report(y_val, y_pred_val))
    
    print("\nReporte de Clasificación - Test:")
    print(classification_report(y_test, y_pred_test))
    
    classes = np.unique(np.concatenate([y_val, y_test, y_pred_val, y_pred_test]))
    
    cm_val = confusion_matrix(y_val, y_pred_val, labels=classes)
    cm_test = confusion_matrix(y_test, y_pred_test, labels=classes)
    
    fig_val = px.imshow(
        cm_val,
        text_auto=True,
        x=classes,
        y=classes,
        color_continuous_scale="Blues",
        title=f"Matriz de Confusión (Validación) - {model_name}"
    )
    fig_val.update_layout(
        title=dict(x=0.5),
        xaxis_title="Predicho",
        yaxis_title="Verdadero"
    )
    fig_val.show()
    
    fig_test = px.imshow(
        cm_test,
        text_auto=True,
        x=classes,
        y=classes,
        color_continuous_scale="Blues",
        title=f"Matriz de Confusión (Test) - {model_name}"
    )
    fig_test.update_layout(
        title=dict(x=0.5),
        xaxis_title="Predicho",
        yaxis_title="Verdadero"
    )
    fig_test.show()

In [11]:
plot_results(y_val, y_test, lr_pred_val, lr_pred_test, "Regresión Logística")


Resultados para Regresión Logística:

Reporte de Clasificación - Validación:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        67
           1       1.00      1.00      1.00        50
           2       1.00      1.00      1.00       199
           3       1.00      1.00      1.00        65
           4       0.82      0.87      0.85       119
           5       1.00      1.00      1.00        70
           6       1.00      1.00      1.00        62
           7       0.73      0.74      0.73       640
           8       0.66      0.57      0.61       702
           9       1.00      1.00      1.00        62
          10       0.62      0.56      0.59       881
          11       1.00      1.00      1.00       116
          12       0.66      0.63      0.65       570
          13       1.00      1.00      1.00       107
          14       0.54      0.57      0.55       269
          15       0.42      0.34      0.38       439
   

In [12]:
plot_results(y_val, y_test, nb_pred_val, nb_pred_test, "Naive Bayes")


Resultados para Naive Bayes:

Reporte de Clasificación - Validación:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        67
           1       1.00      1.00      1.00        50
           2       0.53      1.00      0.69       199
           3       1.00      1.00      1.00        65
           4       0.17      1.00      0.29       119
           5       1.00      1.00      1.00        70
           6       1.00      1.00      1.00        62
           7       0.35      0.23      0.27       640
           8       0.46      0.65      0.54       702
           9       1.00      1.00      1.00        62
          10       0.00      0.00      0.00       881
          11       1.00      1.00      1.00       116
          12       0.00      0.00      0.00       570
          13       0.63      1.00      0.77       107
          14       0.12      1.00      0.21       269
          15       0.00      0.00      0.00       439
          1


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [13]:
lr_accuracy_val = lr_model.score(X_val, y_val)
lr_accuracy_test = lr_model.score(X_test, y_test)
nb_accuracy_val = nb_model.score(X_val, y_val)
nb_accuracy_test = nb_model.score(X_test, y_test)

In [14]:
fig = go.Figure([
    go.Bar(
        name='Validación',
        x=['Regresión Logística', 'Naive Bayes'],
        y=[lr_accuracy_val, nb_accuracy_val],
        marker_color='teal'
    ),
    go.Bar(
        name='Test',
        x=['Regresión Logística', 'Naive Bayes'],
        y=[lr_accuracy_test, nb_accuracy_test],
        marker_color='lightseagreen'
    )
])

fig.update_layout(
    title="Comparación de Accuracy entre Modelos",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    barmode='group'
)
fig = bar_style(fig)
fig.show()