**Analsis exploratorio de los datos clinicos**

In [25]:
import os
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
from styles import *
from PIL import Image

In [26]:
df = pd.read_csv('./data/animal_disease_prediction_cleaned.csv')

In [27]:
disease_count = df['Disease_Prediction'].value_counts().head(8)
fig1 = go.Figure([go.Bar(x=disease_count.index, y=disease_count.values, marker_color='teal')])
fig1.update_layout(
    title="Top 8 Enfermedades más Frecuentes",
    xaxis_title="Enfermedad",
    yaxis_title="Frecuencia"
)
fig1 = bar_style(fig1)
fig1.show()

In [28]:
fig2 = go.Figure([
    go.Scatter(
        x=df['Body_Temperature'],
        y=df['Heart_Rate'],
        mode='markers',
        marker=dict(color='teal', size=8),
        opacity=0.6
    )
])
fig2.update_layout(
    title="Relación entre Temperatura Corporal y Frecuencia Cardíaca",
    xaxis_title="Temperatura Corporal",
    yaxis_title="Frecuencia Cardíaca"
)
fig2 = scatter_style(fig2)
fig2.show()

In [29]:

fig3 = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Top 5 Síntomas - Categoría 1', 'Top 5 Síntomas - Categoría 2', 
                   'Top 5 Síntomas - Categoría 3', 'Top 5 Síntomas - Categoría 4')
)

for i, symptom_col in enumerate(['Symptom_1', 'Symptom_2', 'Symptom_3', 'Symptom_4'], 1):
    # Contar frecuencia de cada síntoma y obtener top 5
    symptom_counts = df[symptom_col].value_counts().head(5)
    
    row = (i-1) // 2 + 1
    col = (i-1) % 2 + 1
    
    fig3.add_trace(
        go.Bar(
            x=symptom_counts.index,
            y=symptom_counts.values,
            marker_color='teal',
            showlegend=False,
        ),
        row=row, col=col
    )
    
    fig3.update_xaxes(title_text="Síntoma", row=row, col=col)
    fig3.update_yaxes(title_text="Frecuencia", row=row, col=col)

fig3.update_layout(
    height=900,
    width=1400,
    showlegend=False,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)'
)

# Aplicar estilo a cada subplot
for i in range(1, 3):
    for j in range(1, 3):
        fig3.update_xaxes(showgrid=True, gridcolor='lightgrey', gridwidth=0.5, row=i, col=j)
        fig3.update_yaxes(showgrid=True, gridcolor='lightgrey', gridwidth=0.5, row=i, col=j)

fig3.show()

In [30]:
variables_correlacion = ['Appetite_Loss', 'Vomiting', 'Body_Temperature', 'Heart_Rate']
correlation_matrix = df[variables_correlacion].corr()

fig_heatmap = go.Figure(data=go.Heatmap(
    z=correlation_matrix,
    x=correlation_matrix.columns,
    y=correlation_matrix.columns,
    colorscale='Blues',
    zmin=-1, zmax=1
))

fig_heatmap.update_layout(
    title='Correlación entre Síntomas y Signos Vitales'
)
fig_heatmap = heatmap_style(fig_heatmap, width=800)
fig_heatmap.show()

In [31]:
sintomas = ['Appetite_Loss', 'Vomiting', 'Diarrhea', 'Coughing', 'Labored_Breathing']
df_mean = df.groupby('Disease_Prediction')[sintomas].mean().reset_index()

fig_bar = go.Figure()
for sintoma in sintomas:
    fig_bar.add_trace(go.Bar(
        name=sintoma,
        x=df_mean['Disease_Prediction'],
        y=df_mean[sintoma],
    ))

fig_bar.update_layout(
    title='Frecuencia de Síntomas por Enfermedad',
    barmode='group',
    xaxis_title='Enfermedad',
    yaxis_title='Frecuencia Promedio'
)
fig_bar = bar_style(fig_bar, width=1400)
fig_bar.show()

In [32]:
sintomas = ['Lameness', 'Skin_Lesions', 'Nasal_Discharge', 'Eye_Discharge']
df_mean = df.groupby('Disease_Prediction')[sintomas].mean().reset_index()
fig_bar = go.Figure()
for sintoma in sintomas:
    fig_bar.add_trace(go.Bar(
        name=sintoma,
        x=df_mean['Disease_Prediction'],
        y=df_mean[sintoma],
    ))

fig_bar.update_layout(
    title='Frecuencia de Síntomas por Enfermedad',
    barmode='group',
    xaxis_title='Enfermedad',
    yaxis_title='Frecuencia Promedio'
)
fig_bar = bar_style(fig_bar, width=1400)
fig_bar.show()

In [33]:
fig_temp = go.Figure()
fig_temp.add_trace(
    go.Box(x=df['Disease_Prediction'], y=df['Body_Temperature'], name='Temperatura')
)
fig_temp.update_layout(
    title={'text': 'Distribución de Temperatura por Enfermedad', 'x': 0.5, 'xanchor': 'center'},
    xaxis_title='Enfermedad',
    yaxis_title='Temperatura',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(showgrid=True, gridcolor='lightgray', gridwidth=0.5),
    yaxis=dict(showgrid=True, gridcolor='lightgray', gridwidth=0.5),
    height=550
)
fig_temp.show()


In [34]:
fig_hr = go.Figure()
fig_hr.add_trace(
    go.Box(x=df['Disease_Prediction'], y=df['Heart_Rate'], name='Ritmo Cardíaco', marker_color='#E53935')
)
fig_hr.update_layout(
    title={'text': 'Distribución de Ritmo Cardíaco por Enfermedad', 'x': 0.5, 'xanchor': 'center'},
    xaxis_title='Enfermedad',
    yaxis_title='Ritmo Cardíaco',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(showgrid=True, gridcolor='lightgray', gridwidth=0.5),
    yaxis=dict(showgrid=True, gridcolor='lightgray', gridwidth=0.5),
    height=550
)
fig_hr.show()

In [35]:
import numpy as np
import plotly.graph_objects as go

fig_scatter = go.Figure()

enfermedades = df['Disease_Prediction'].unique()
num_enfermedades = len(enfermedades)

colors = [
    f'rgba(0, {int(255 - 255 * i)}, {int(255 * i)}, 1)'
    for i in np.linspace(0, 1, num_enfermedades)
]

for i, enfermedad in enumerate(enfermedades):
    mask = df['Disease_Prediction'] == enfermedad

    fig_scatter.add_trace(go.Scatter(
        x=df[mask]['Duration_days'],
        y=df[mask]['Body_Temperature'],
        mode='markers',
        name=enfermedad,
        marker=dict(
            size=8,
            color=colors[i],
            showscale=False
        ),
        hovertemplate='Duración: %{x} días<br>Temperatura: %{y}°C<br>Enfermedad: ' + enfermedad + '<extra></extra>'
    ))

fig_scatter.update_layout(
    title='Relación entre Duración y Temperatura Corporal',
    xaxis_title='Duración (días)',
    yaxis_title='Temperatura Corporal (°C)',
    showlegend=True,
    legend_title='Enfermedades'
)

fig_scatter = scatter_style(fig_scatter)
fig_scatter.show()

**Analisis Exploratorio de las imagenes**


In [36]:
data = './data/pet_disease_images_augmented'

In [37]:
class_counts = {}
for disease_class in os.listdir(data):
    class_path = os.path.join(data, disease_class)
    if os.path.isdir(class_path):
        n_images = len(os.listdir(class_path))
        class_counts[disease_class] = n_images

fig1 = go.Figure([go.Pie(
    labels=list(class_counts.keys()),
    values=list(class_counts.values()),
    marker=dict(colors=px.colors.qualitative.Set3),
    textinfo='label+percent',
    hole=0.3,
    pull=[0.1 if i == 0 else 0 for i in range(len(class_counts))]
)])
fig1.update_layout(
    title="Distribución de Imágenes por Clase",
    annotations=[dict(text=f'Total: {sum(class_counts.values())}', x=0.5, y=0.5, showarrow=False)]
)
fig1 = pie_style(fig1, height=800)
fig1.show()

In [38]:
from tkinter import W


def plot_sample_images():
    fig = make_subplots(rows=2, cols=2, subplot_titles=list(class_counts.keys())[:4])
    
    for idx, disease_class in enumerate(list(class_counts.keys())[:4], 1):
        class_path = os.path.join(data, disease_class)
        sample_image = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, sample_image)
        
        img = np.array(Image.open(img_path))
        
        row = (idx-1) // 2 + 1
        col = (idx-1) % 2 + 1
        
        fig.add_trace(
            go.Image(z=img),
            row=row, col=col
        )
    
    fig.update_layout(
        title={"text": "Ejemplos de Imágenes por Clase", "x": 0.5, "xanchor": "center"},
        height=800,
        width=1000
    )
    fig.show()

plot_sample_images()

In [15]:
animal_types = {'Cat': 0, 'Dog': 0}
for disease_class in class_counts.keys():
    if 'Cat' in disease_class:
        animal_types['Cat'] += class_counts[disease_class]
    elif 'Dog' in disease_class:
        animal_types['Dog'] += class_counts[disease_class]

fig4 = go.Figure([go.Pie(
    labels=list(animal_types.keys()),
    values=list(animal_types.values()),
    hole=0.3,
    marker=dict(colors=['#ffffb3', '#8dd3c7'])
)])
fig4.update_layout(title="Distribución de Imágenes por Tipo de Animal")
fig4 = pie_style(fig4)
fig4.show()