In [67]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from styles import *

In [68]:
df = pd.read_csv('./data/animal_disease_prediction_cleaned.csv')

In [69]:
le = LabelEncoder()
df['Animal_Type'] = le.fit_transform(df['Animal_Type'])
df['Breed'] = le.fit_transform(df['Breed'])
df['Gender'] = le.fit_transform(df['Gender'])
disease_encoder = LabelEncoder()
df['Disease_Prediction'] = disease_encoder.fit_transform(df['Disease_Prediction'])

In [70]:
print("Número de enfermedades únicas:", len(disease_encoder.classes_))
print("\nEnfermedades:")
for i, disease in enumerate(disease_encoder.classes_):
    print(f"{i}: {disease}")

Número de enfermedades únicas: 139

Enfermedades:
0: Actinobacillus Pleuropneumonia
1: Actinobacillus Suis
2: African Swine Fever
3: Allergic Rhinitis
4: Arthritis
5: Blue Tongue
6: Blue Tongue Disease
7: Blue Tongue Virus
8: Bluetongue
9: Bluetongue Virus
10: Bordetella Infection
11: Bovine Coccidiosis
12: Bovine Influenza
13: Bovine Johne's Disease
14: Bovine Leukemia Virus
15: Bovine Mastitis
16: Bovine Parainfluenza
17: Bovine Pneumonia
18: Bovine Respiratory Disease
19: Bovine Respiratory Disease Complex
20: Bovine Respiratory Syncytial Virus
21: Bovine Tuberculosis
22: Bovine Viral Diarrhea
23: Canine Cough
24: Canine Distemper
25: Canine Flu
26: Canine Heartworm Disease
27: Canine Hepatitis
28: Canine Infectious Hepatitis
29: Canine Influenza
30: Canine Leptospirosis
31: Canine Parvovirus
32: Caprine Arthritis
33: Caprine Arthritis Encephalitis
34: Caprine Arthritis Encephalitis Virus
35: Caprine Pleuropneumonia
36: Caprine Respiratory Disease
37: Caprine Viral Arthritis
38: Cas

In [71]:
features = ['Animal_Type', 'Breed', 'Age', 'Gender', 'Weight', 
           'Appetite_Loss', 'Vomiting', 'Diarrhea', 'Coughing',
           'Labored_Breathing', 'Lameness', 'Skin_Lesions',
           'Nasal_Discharge', 'Eye_Discharge', 'Body_Temperature', 'Heart_Rate']

X = df[features]
y = df['Disease_Prediction']

In [72]:
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

In [73]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [74]:
# 1. Regresión Logística
lr_model = LogisticRegression(max_iter=1000, multi_class='multinomial')
lr_model.fit(X_train, y_train)

lr_pred_val = lr_model.predict(X_val)
lr_pred_test = lr_model.predict(X_test)





In [75]:
# 2. Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

nb_pred_val = nb_model.predict(X_val)
nb_pred_test = nb_model.predict(X_test)

In [76]:
def plot_results(y_val, y_test, y_pred_val, y_pred_test, model_name):
    print(f"\nResultados para {model_name}:")
    
    print("\nReporte de Clasificación - Validación:")
    print(classification_report(y_val, y_pred_val))
    
    print("\nReporte de Clasificación - Test:")
    print(classification_report(y_test, y_pred_test))
    
    classes = np.unique(np.concatenate([y_val, y_test, y_pred_val, y_pred_test]))
    
    cm_val = confusion_matrix(y_val, y_pred_val, labels=classes)
    cm_test = confusion_matrix(y_test, y_pred_test, labels=classes)
    
    fig_val = px.imshow(
        cm_val,
        text_auto=True,
        x=classes,
        y=classes,
        color_continuous_scale="Blues",
        title=f"Matriz de Confusión (Validación) - {model_name}"
    )
    fig_val.update_layout(
        title=dict(x=0.5),
        xaxis_title="Predicho",
        yaxis_title="Verdadero"
    )
    fig_val.show()
    
    fig_test = px.imshow(
        cm_test,
        text_auto=True,
        x=classes,
        y=classes,
        color_continuous_scale="Blues",
        title=f"Matriz de Confusión (Test) - {model_name}"
    )
    fig_test.update_layout(
        title=dict(x=0.5),
        xaxis_title="Predicho",
        yaxis_title="Verdadero"
    )
    fig_test.show()

In [77]:
plot_results(y_val, y_test, lr_pred_val, lr_pred_test, "Regresión Logística")


Resultados para Regresión Logística:

Reporte de Clasificación - Validación:
              precision    recall  f1-score   support

           0       0.60      0.50      0.55         6
           1       1.00      1.00      1.00         1
           2       0.00      0.00      0.00         0
           6       1.00      1.00      1.00         1
           7       0.00      0.00      0.00         2
           8       0.33      0.20      0.25         5
           9       0.00      0.00      0.00         0
          10       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         1
          14       1.00      1.00      1.00         1
          15       0.50      1.00      0.67         1
          16       1.00      1.00      1.00         1
          17       1.00      1.00      1.00         1
          18       0.25      0.75      0.38         4
          19       1.00      0.50      0.67         2
          20       0.50      1.00      0.67         1
   


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set 

In [78]:
plot_results(y_val, y_test, nb_pred_val, nb_pred_test, "Naive Bayes")


Resultados para Naive Bayes:

Reporte de Clasificación - Validación:
              precision    recall  f1-score   support

           0       1.00      0.17      0.29         6
           1       1.00      1.00      1.00         1
           2       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         1
           7       1.00      1.00      1.00         2
           8       0.60      0.60      0.60         5
           9       0.00      0.00      0.00         0
          10       1.00      1.00      1.00         1
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       1.00      1.00      1.00         1
          16       1.00      1.00      1.00         1
          17       0.00      0.00      0.00         1
          18       0.00      0.00      0.00         4
          19       0.67      1.00      0.80         2
          20       0.33      1.00      0.50         1
          2


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set 

In [79]:
lr_accuracy_val = lr_model.score(X_val, y_val)
lr_accuracy_test = lr_model.score(X_test, y_test)
nb_accuracy_val = nb_model.score(X_val, y_val)
nb_accuracy_test = nb_model.score(X_test, y_test)

In [80]:
fig = go.Figure([
    go.Bar(
        name='Validación',
        x=['Regresión Logística', 'Naive Bayes'],
        y=[lr_accuracy_val, nb_accuracy_val],
        marker_color='teal'
    ),
    go.Bar(
        name='Test',
        x=['Regresión Logística', 'Naive Bayes'],
        y=[lr_accuracy_test, nb_accuracy_test],
        marker_color='lightseagreen'
    )
])

fig.update_layout(
    title="Comparación de Accuracy entre Modelos",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    barmode='group'
)
fig = bar_style(fig)
fig.show()