In [None]:
# Celda 1: Imports y configuraci칩n
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from data_preprocessing import MovieDataPreprocessor

plt.style.use('seaborn-v0_8')
%matplotlib inline

# Celda 2: Cargar y explorar datos
DATA_PATH = "../data/raw"
preprocessor = MovieDataPreprocessor(DATA_PATH, img_size=(224, 224))

print("游꿟 EXPLORACI칍N DEL DATASET DE PEL칈CULAS")
print("="*50)

# Cargar datos
X, y_labels = preprocessor.load_data()
print(f"Total de im치genes cargadas: {len(X)}")
print(f"Forma de las im치genes: {X[0].shape}")

# Celda 3: An치lisis de distribuci칩n de clases
from collections import Counter

class_counts = Counter(y_labels)
print("\n游늵 Distribuci칩n de g칠neros:")
for genre, count in class_counts.items():
    print(f"  {genre}: {count} pel칤culas ({count/len(y_labels)*100:.1f}%)")

# Visualizaci칩n
plt.figure(figsize=(12, 6))
genres = list(class_counts.keys())
counts = list(class_counts.values())

plt.subplot(1, 2, 1)
plt.bar(genres, counts, color='skyblue', edgecolor='navy', alpha=0.7)
plt.title('Distribuci칩n de G칠neros de Pel칤culas')
plt.xlabel('G칠nero')
plt.ylabel('N칰mero de Pel칤culas')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
plt.pie(counts, labels=genres, autopct='%1.1f%%', startangle=90)
plt.title('Proporci칩n de G칠neros')

plt.tight_layout()
plt.show()

# Celda 4: Visualizaci칩n de muestras
def plot_movie_samples(X, y_labels, samples_per_class=3):
    """Mostrar muestras de cada clase"""
    unique_labels = np.unique(y_labels)
    
    fig, axes = plt.subplots(len(unique_labels), samples_per_class, 
                            figsize=(15, 4*len(unique_labels)))
    
    for i, label in enumerate(unique_labels):
        # Encontrar 칤ndices de esta clase
        indices = np.where(y_labels == label)[0]
        selected_indices = np.random.choice(indices, samples_per_class, replace=False)
        
        for j, idx in enumerate(selected_indices):
            ax = axes[i, j] if len(unique_labels) > 1 else axes[j]
            ax.imshow(X[idx])
            ax.set_title(f'{label}')
            ax.axis('off')
    
    plt.tight_layout()
    plt.show()

plot_movie_samples(X, y_labels)

# Celda 5: An치lisis estad칤stico de im치genes
def analyze_image_statistics(X):
    """Analizar estad칤sticas de las im치genes"""
    print("游늳 ESTAD칈STICAS DE LAS IM츼GENES")
    print("-" * 35)
    
    # Estad칤sticas de intensidad
    mean_intensity = np.mean(X)
    std_intensity = np.std(X)
    min_intensity = np.min(X)
    max_intensity = np.max(X)
    
    print(f"Intensidad promedio: {mean_intensity:.3f}")
    print(f"Desviaci칩n est치ndar: {std_intensity:.3f}")
    print(f"Valor m칤nimo: {min_intensity:.3f}")
    print(f"Valor m치ximo: {max_intensity:.3f}")
    
    # Histograma de intensidades
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.hist(X[:,:,:,0].flatten(), bins=50, alpha=0.7, color='red', label='Canal Rojo')
    plt.title('Distribuci칩n Canal Rojo')
    plt.xlabel('Intensidad')
    plt.ylabel('Frecuencia')
    
    plt.subplot(1, 3, 2)
    plt.hist(X[:,:,:,1].flatten(), bins=50, alpha=0.7, color='green', label='Canal Verde')
    plt.title('Distribuci칩n Canal Verde')
    plt.xlabel('Intensidad')
    plt.ylabel('Frecuencia')
    
    plt.subplot(1, 3, 3)
    plt.hist(X[:,:,:,2].flatten(), bins=50, alpha=0.7, color='blue', label='Canal Azul')
    plt.title('Distribuci칩n Canal Azul')
    plt.xlabel('Intensidad')
    plt.ylabel('Frecuencia')
    
    plt.tight_layout()
    plt.show()

analyze_image_statistics(X)