In [1]:
import os
import kaggle
import zipfile
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
from styles import *
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img


**Obtener el dataset de imágenes**


In [2]:
dest_path = './data/pet_disease_images'

kaggle.api.dataset_download_files('smadive/pet-disease-images', path=dest_path, unzip=False)
zip_file = os.path.join(dest_path, 'pet-disease-images.zip')

with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(dest_path)


Dataset URL: https://www.kaggle.com/datasets/smadive/pet-disease-images


**Preprocesamiento de los datos clínicos**

In [3]:
# Data augmentation
df = pd.read_csv('./data/animal_disease_prediction.csv')

def generate_variation(value, column):
    if isinstance(value, (int, float)):
        if column == 'Age':
            return value + np.random.uniform(-0.5, 0.5)
        elif column == 'Weight':
            return value * (1 + np.random.uniform(-0.05, 0.05))
        elif column == 'Heart_Rate':
            return int(value * (1 + np.random.uniform(-0.05, 0.05)))
    elif isinstance(value, str) and '°C' in value:
        temp = float(value.replace('°C', ''))
        return f"{temp + np.random.uniform(-0.3, 0.3):.1f}°C"
    return value

numeric_columns = ['Age', 'Weight', 'Body_Temperature', 'Heart_Rate']

augmented_dfs = [df]
for i in range(299):
    df_aug = df.copy()
    for column in numeric_columns:
        df_aug[column] = df_aug[column].apply(lambda x: generate_variation(x, column))
    augmented_dfs.append(df_aug)

df_augmented = pd.concat(augmented_dfs, ignore_index=True)


In [4]:
df_augmented.head()

Unnamed: 0,Animal_Type,Breed,Age,Gender,Weight,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Duration,...,Diarrhea,Coughing,Labored_Breathing,Lameness,Skin_Lesions,Nasal_Discharge,Eye_Discharge,Body_Temperature,Heart_Rate,Disease_Prediction
0,Dog,Labrador,4.0,Male,25.0,Fever,Lethargy,Appetite Loss,Vomiting,3 days,...,No,No,No,No,No,No,No,39.5°C,120,Parvovirus
1,Cat,Siamese,2.0,Female,4.5,Coughing,Sneezing,Eye Discharge,Nasal Discharge,1 week,...,No,Yes,No,No,No,Yes,Yes,38.9°C,150,Upper Respiratory Infection
2,Cow,Holstein,3.0,Female,600.0,Fever,Nasal Discharge,Labored Breathing,Coughing,5 days,...,No,Yes,Yes,No,No,Yes,No,40.1°C,90,Foot and Mouth Disease
3,Dog,Beagle,1.0,Male,10.0,Diarrhea,Vomiting,Lethargy,Appetite Loss,2 days,...,Yes,No,No,No,No,No,No,39.2°C,130,Gastroenteritis
4,Cat,Persian,5.0,Male,3.8,Lethargy,Appetite Loss,Skin Lesions,No,2 weeks,...,No,No,No,No,Yes,No,No,38.7°C,160,Fungal Infection


In [5]:
# Analisis general
df_augmented.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129300 entries, 0 to 129299
Data columns (total 22 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Animal_Type         129300 non-null  object 
 1   Breed               129300 non-null  object 
 2   Age                 129300 non-null  float64
 3   Gender              129300 non-null  object 
 4   Weight              129300 non-null  float64
 5   Symptom_1           129300 non-null  object 
 6   Symptom_2           129300 non-null  object 
 7   Symptom_3           129300 non-null  object 
 8   Symptom_4           129300 non-null  object 
 9   Duration            129300 non-null  object 
 10  Appetite_Loss       129300 non-null  object 
 11  Vomiting            129300 non-null  object 
 12  Diarrhea            129300 non-null  object 
 13  Coughing            129300 non-null  object 
 14  Labored_Breathing   129300 non-null  object 
 15  Lameness            129300 non-nul

In [6]:
df_augmented.describe()

Unnamed: 0,Age,Weight,Heart_Rate
count,129300.0,129300.0,129300.0
mean,5.042788,214.461944,105.095553
std,2.568255,259.509158,28.030785
min,0.500041,0.950112,66.0
25%,3.253867,7.800319,80.0
50%,4.689595,81.543391,93.0
75%,6.252004,511.720908,131.0
max,16.497766,892.486992,173.0


In [7]:
df_augmented.isnull().sum()

Animal_Type           0
Breed                 0
Age                   0
Gender                0
Weight                0
Symptom_1             0
Symptom_2             0
Symptom_3             0
Symptom_4             0
Duration              0
Appetite_Loss         0
Vomiting              0
Diarrhea              0
Coughing              0
Labored_Breathing     0
Lameness              0
Skin_Lesions          0
Nasal_Discharge       0
Eye_Discharge         0
Body_Temperature      0
Heart_Rate            0
Disease_Prediction    0
dtype: int64

In [8]:
# Correcion de formatos
df_augmented['Body_Temperature'] = df_augmented['Body_Temperature'].str.replace('°C','').astype(float)

df_augmented['Duration_days'] = df_augmented['Duration'].apply(lambda x: int(x.split()[0]) * (7 if 'week' in x else 1))
df_augmented = df_augmented.drop('Duration', axis=1)


df_augmented['Age'] = df_augmented['Age'].astype(int)
df_augmented['Weight'] = df_augmented['Weight'].round(1)

In [9]:
df_augmented = df_augmented[df_augmented['Animal_Type'].isin(['Dog', 'Cat'])]

In [10]:
df_augmented['Disease_Prediction'].unique()

array(['Parvovirus', 'Upper Respiratory Infection', 'Gastroenteritis',
       'Fungal Infection', 'Lyme Disease', 'Intestinal Parasites',
       'Canine Distemper', 'Panleukopenia', 'Kennel Cough',
       'Canine Parvovirus', 'Ringworm', 'Tick-Borne Disease', 'Arthritis',
       'Feline Herpesvirus', 'Feline Leukemia', 'Heartworm Disease',
       'Feline Infectious Peritonitis', 'Conjunctivitis',
       'Chronic Bronchitis', 'Feline Upper Respiratory Infection',
       'Pancreatitis', 'Feline Calicivirus', 'Bordetella Infection',
       'Inflammatory Bowel Disease', 'Allergic Rhinitis',
       'Feline Renal Disease', 'Feline Viral Rhinotracheitis',
       'Feline Panleukopenia', 'Canine Flu', 'Hyperthyroidism',
       'Canine Hepatitis', 'Feline Respiratory Disease Complex',
       'Feline Rhinotracheitis', 'Feline Respiratory Infection',
       'Feline Leukemia Virus', 'Leptospirosis', 'Canine Leptospirosis',
       'Feline Chlamydia', 'Canine Influenza', 'Feline Coronavirus',
       

In [11]:
disease_abbr_clean = {
    'Canine Distemper': 'Distemper',
    'Distemper': 'Distemper',
    'Canine Parvovirus': 'Parvovirus',
    'Parvovirus': 'Parvovirus',
    'Canine Flu': 'Flu',
    'Canine Influenza': 'Flu',
    'Canine Hepatitis': 'Hepatitis',
    'Canine Infectious Hepatitis': 'Hepatitis',
    'Canine Leptospirosis': 'Leptospirosis',
    'Canine Heartworm Disease': 'Heartworm',
    'Heartworm Disease': 'Heartworm',
    'Kennel Cough': 'Cough',
    'Canine Cough': 'Cough',
    'Bordetella Infection': 'Cough',

    'Feline Herpesvirus': 'Herpes',
    'Feline Calicivirus': 'Flu',
    'Feline Viral Rhinotracheitis': 'Flu',
    'Feline Rhinotracheitis': 'Flu',
    'Feline Upper Respiratory Infection': 'Respiratory Infection',
    'Feline Respiratory Infection': 'Respiratory Infection',
    'Feline Respiratory Disease Complex': 'Respiratory Disease',
    'Feline Panleukopenia': 'Parvovirus',
    'Feline Panleukopenia Virus': 'Parvovirus',
    'Feline Leukemia': 'Leukemia',
    'Feline Leukemia Virus': 'Leukemia',
    'Feline Infectious Peritonitis': 'Peritonitis',
    'Feline Renal Disease': 'Kidney Disease',
    'Feline Chlamydia': 'Chlamydia',
    'Feline Chlamydiosis': 'Chlamydia',
    'Feline Coronavirus': 'Coronavirus',
    'Feline Asthma': 'Asthma',
    'Feline Immunodeficiency Virus': 'FIV',

    'Upper Respiratory Infection': 'Respiratory Infection',
    'Gastroenteritis': 'Gastroenteritis',
    'Fungal Infection': 'Fungal Infection',
    'Lyme Disease': 'Lyme Disease',
    'Intestinal Parasites': 'Intestinal Parasites',
    'Ringworm': 'Ringworm',
    'Tick-Borne Disease': 'Tick-Borne Disease',
    'Arthritis': 'Arthritis',
    'Conjunctivitis': 'Conjunctivitis',
    'Chronic Bronchitis': 'Bronchitis',
    'Pancreatitis': 'Pancreatitis',
    'Inflammatory Bowel Disease': 'IBD',
    'Allergic Rhinitis': 'Allergic Rhinitis',
    'Hyperthyroidism': 'Hyperthyroidism',
    'Leptospirosis': 'Leptospirosis'
}

df_augmented['Disease_Prediction'] = df_augmented['Disease_Prediction'].replace(disease_abbr_clean)

In [12]:
df_augmented['Disease_Prediction'].unique()

array(['Parvovirus', 'Respiratory Infection', 'Gastroenteritis',
       'Fungal Infection', 'Lyme Disease', 'Intestinal Parasites',
       'Distemper', 'Panleukopenia', 'Cough', 'Ringworm',
       'Tick-Borne Disease', 'Arthritis', 'Herpes', 'Leukemia',
       'Heartworm', 'Peritonitis', 'Conjunctivitis', 'Bronchitis',
       'Pancreatitis', 'Flu', 'IBD', 'Allergic Rhinitis',
       'Kidney Disease', 'Hyperthyroidism', 'Hepatitis',
       'Respiratory Disease', 'Leptospirosis', 'Chlamydia', 'Coronavirus',
       'Asthma', 'FIV'], dtype=object)

In [13]:
# Transformacion de variables
yes_no_cols = ['Appetite_Loss','Vomiting','Diarrhea','Coughing','Labored_Breathing',
               'Lameness','Skin_Lesions','Nasal_Discharge','Eye_Discharge']
for col in yes_no_cols:
    df_augmented[col] = df_augmented[col].map({'Yes':1, 'No':0})



In [14]:
# Detectar outliers
num_cols = ['Age', 'Weight', 'Body_Temperature', 'Heart_Rate']
colors = px.colors.qualitative.Plotly

fig = make_subplots(rows=1, cols=4)

for i, col in enumerate(num_cols):
    fig.add_trace(
        go.Box(
            y=df_augmented[col],
            marker_color=colors[i % len(colors)],
            showlegend=True,
            name=f"<span style='font-size:13px'>{col}</span>"
        ),
        row=1, col=i+1
    )

fig = boxplot_style(fig, n_cols=4)
fig.show()

In [15]:
# Guarda el DataFrame limpio
df_augmented.to_csv('data/animal_disease_prediction_cleaned.csv', index=False)

**Preprocesamiento de las imágenes**

In [18]:
dataset_path = "./data/images"
target_size = (224, 224)

classes = sorted(os.listdir(dataset_path))
class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}

for cls in classes:
    print(cls)


Dental Disease in Cat
Dental Disease in Dog
Distemper in Dog
Ear Mites in Cat
Eye Infection in Cat
Eye Infection in Dog
Feline Leukemia
Feline Panleukopenia
Fungal Infection in Cat
Fungal Infection in Dog
Hot Spots in Dog
Kennel Cough in Dog
Mange in Dog
Parvovirus in Dog
Ringworm in Cat
Scabies in Cat
Skin Allergy in Cat
Skin Allergy in Dog
Tick Infestation in Dog
Urinary Tract Infection in Cat
Worm Infection in Cat
Worm Infection in Dog


In [19]:
image_extensions = {'.jpg', '.jpeg', '.png'}
image_count = sum(
    1
    for root, _, files in os.walk(dataset_path)
    for file in files
    if os.path.splitext(file.lower())[1] in image_extensions
)

print(f"Cantidad de imágenes en el dataset: {image_count}")


Cantidad de imágenes en el dataset: 1673


In [20]:
images = []
labels = []

# Formetear y normalizar imágenes
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    for img_file in os.listdir(cls_folder):
        img_path = os.path.join(cls_folder, img_file)
        try:
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img) / 255.0
            images.append(img_array)
            labels.append(class_to_idx[cls])
        except Exception as e:
            print(f"Error cargando {img_path}: {e}")



Palette images with Transparency expressed in bytes should be converted to RGBA images



In [23]:
# Data Augmentation
augmented_path = "./data/images"

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

for cls in os.listdir(dataset_path):
    cls_folder = os.path.join(dataset_path, cls)
    cls_aug_folder = os.path.join(augmented_path, cls)
    os.makedirs(cls_aug_folder, exist_ok=True)

    for img_file in os.listdir(cls_folder):
        img_path = os.path.join(cls_folder, img_file)
        img = load_img(img_path, target_size=(224,224))
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)

        i = 0
        for batch in datagen.flow(x, batch_size=1, save_to_dir=cls_aug_folder, save_prefix='aug', save_format='png'):
            i += 1
            if i >= 5:
                break

In [24]:
image_count = sum(
    1
    for root, _, files in os.walk(augmented_path)
    for file in files
    if os.path.splitext(file.lower())[1] in image_extensions
)

print(f"Cantidad de imágenes en el dataset: {image_count}")

Cantidad de imágenes en el dataset: 10189
