In [3]:
# ============================================
# INTERFAZ GRADIO - VERSI√ìN NUMPY 2.x
# Notebook: 08_gradio_inference.ipynb
# ============================================

print("üì¶ INSTALANDO DEPENDENCIAS...")
print("=" * 60)

# ESTRATEGIA: Usar NumPy 2.x (compatible con los .pkl generados)
# Ignorar warnings de MediaPipe (funcionar√° de todas formas)

# Desinstalar conflictos
!pip uninstall -y numpy mediapipe opencv-python opencv-python-headless -q

# Instalar NumPy 2.x (compatible con los .pkl)
!pip install numpy>=2.0 -q

# Instalar MediaPipe (advertir√° pero funcionar√°)
!pip install mediapipe==0.10.21 --no-deps -q
!pip install opencv-python==4.8.1.78 -q
!pip install attrs flatbuffers absl-py protobuf>=3.20 -q

# Otras dependencias
!pip install matplotlib pandas tqdm -q
!pip install gradio -q

print("‚úÖ Dependencias instaladas")
print("‚ö†Ô∏è  Ignorar warnings de compatibilidad - el c√≥digo funcionar√°\n")

# ============================================
# IMPORTS
# ============================================

print("üìö IMPORTANDO LIBRER√çAS...")

import gradio as gr
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import os
import joblib
import warnings
warnings.filterwarnings('ignore')

# MediaPipe import especial
import sys
sys.path.insert(0, '/usr/local/lib/python3.12/dist-packages')

try:
    import mediapipe as mp
    print(f"‚úÖ MediaPipe: {mp.__version__}")
except ImportError as e:
    print(f"‚ö†Ô∏è  Reinstalando MediaPipe...")
    !pip install mediapipe==0.10.21 --force-reinstall -q
    import mediapipe as mp

print(f"‚úÖ NumPy: {np.__version__}")
print(f"‚úÖ OpenCV: {cv2.__version__}\n")

# ============================================
# CARGAR MODELO Y TRANSFORMERS
# ============================================

print("ü§ñ CARGANDO MODELO Y TRANSFORMERS...")
print("=" * 60)

# Verificar archivos
required_files = {
    'Modelo': 'best_model_mlp.pkl',
    'Scaler': 'scaler.pkl',
    'PCA': 'pca.pkl',
    'Encoder': 'label_encoder.pkl'
}

print(f"\nüìÇ VERIFICANDO ARCHIVOS:")
for name, filepath in required_files.items():
    if os.path.exists(filepath):
        size_kb = os.path.getsize(filepath) / 1024
        print(f"   ‚úÖ {name}: {filepath} ({size_kb:.2f} KB)")
    else:
        print(f"   ‚ùå {name}: {filepath} NO ENCONTRADO")
        raise FileNotFoundError(f"{filepath} no encontrado")

# Cargar con NumPy 2.x (ahora deber√≠a funcionar)
print(f"\nüîÑ CARGANDO COMPONENTES...")

model = joblib.load('best_model_mlp.pkl')
print(f"   ‚úÖ Modelo MLP cargado")

scaler = joblib.load('scaler.pkl')
print(f"   ‚úÖ Scaler cargado")

pca = joblib.load('pca.pkl')
print(f"   ‚úÖ PCA cargado ({pca.n_components_} componentes)")

label_encoder = joblib.load('label_encoder.pkl')
print(f"   ‚úÖ Label Encoder cargado ({len(label_encoder.classes_)} clases)")

print(f"\nüè∑Ô∏è  CLASES DETECTABLES:")
for i, activity in enumerate(label_encoder.classes_):
    print(f"   {i}. {activity.replace('_', ' ').title()}")

print(f"\n‚úÖ MODELO LISTO\n")

# ============================================
# CONFIGURAR MEDIAPIPE
# ============================================

print("üé• CONFIGURANDO MEDIAPIPE...")

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

pose = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

print("‚úÖ MediaPipe configurado\n")

# ============================================
# FUNCIONES DE PROCESAMIENTO
# ============================================

print("üîß DEFINIENDO FUNCIONES...")

def extract_landmarks(frame):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(frame_rgb)

    annotated_frame = frame.copy()
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            annotated_frame,
            results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )

        landmarks = []
        for landmark in results.pose_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])

        return np.array(landmarks, dtype=np.float32), annotated_frame

    return None, annotated_frame


def compute_geometric_features(landmarks):
    landmarks_reshaped = landmarks.reshape(33, 4)
    coords = landmarks_reshaped[:, :3]

    # Distancias
    key_pairs = [
        (11, 12), (11, 13), (13, 15), (12, 14), (14, 16),
        (11, 23), (12, 24), (23, 24), (23, 25), (25, 27),
        (24, 26), (26, 28), (27, 29), (29, 31), (28, 30),
        (30, 32), (15, 17), (16, 18), (0, 1)
    ]

    distances = []
    for p1, p2 in key_pairs:
        dist = np.linalg.norm(coords[p1] - coords[p2])
        distances.append(dist)

    # √Ångulos
    def calculate_angle(a, b, c):
        ba = a - b
        bc = c - b
        cosine = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
        angle = np.arccos(np.clip(cosine, -1.0, 1.0))
        return np.degrees(angle)

    angles = [
        calculate_angle(coords[11], coords[13], coords[15]),
        calculate_angle(coords[12], coords[14], coords[16]),
        calculate_angle(coords[13], coords[11], coords[23]),
        calculate_angle(coords[14], coords[12], coords[24]),
        calculate_angle(coords[11], coords[23], coords[25]),
        calculate_angle(coords[12], coords[24], coords[26]),
        calculate_angle(coords[23], coords[25], coords[27]),
        calculate_angle(coords[24], coords[26], coords[28]),
        calculate_angle(coords[25], coords[27], coords[29]),
        calculate_angle(coords[26], coords[28], coords[30]),
        calculate_angle(coords[11], coords[12], coords[24]),
        calculate_angle(coords[23], coords[24], coords[26]),
        calculate_angle(coords[27], coords[23], coords[11]),
        calculate_angle(coords[28], coords[24], coords[12]),
        calculate_angle(coords[15], coords[11], coords[12])
    ]

    # Ratios
    torso_height = np.linalg.norm(coords[11] - coords[23])
    leg_length_left = np.linalg.norm(coords[23] - coords[27])
    leg_length_right = np.linalg.norm(coords[24] - coords[28])
    arm_length_left = np.linalg.norm(coords[11] - coords[15])
    arm_length_right = np.linalg.norm(coords[12] - coords[16])

    ratios = [
        leg_length_left / (torso_height + 1e-6),
        leg_length_right / (torso_height + 1e-6),
        arm_length_left / (torso_height + 1e-6),
        arm_length_right / (torso_height + 1e-6),
        coords[23, 1] - coords[11, 1],
        coords[27, 1] - coords[23, 1],
        coords[15, 0] - coords[11, 0],
        coords[16, 0] - coords[12, 0],
        np.abs(coords[23, 0] - coords[24, 0]),
        np.abs(coords[11, 0] - coords[12, 0]),
        (coords[15, 1] + coords[16, 1]) / 2,
        (coords[27, 1] + coords[28, 1]) / 2,
        np.mean(coords[:, 1]),
        np.std(coords[:, 0]),
        np.std(coords[:, 1])
    ]

    all_features = np.concatenate([landmarks, distances, angles, ratios])
    return all_features[:83].astype(np.float32)


def preprocess_and_predict(features):
    features_reshaped = features.reshape(1, -1)
    features_scaled = scaler.transform(features_reshaped)
    features_pca = pca.transform(features_scaled)

    prediction = model.predict(features_pca)[0]
    probabilities_array = model.predict_proba(features_pca)[0]

    predicted_class = label_encoder.inverse_transform([prediction])[0]

    probabilities = {
        label_encoder.classes_[i]: prob
        for i, prob in enumerate(probabilities_array)
    }

    confidence = probabilities_array.max()

    return predicted_class, probabilities, confidence


def process_video(video_path, max_frames=300):
    print(f"\nüé• PROCESANDO: {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None, "‚ùå Error abriendo video", None

    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    print(f"   üìä {width}x{height} @ {fps}fps, {total_frames} frames")

    frames_to_process = min(total_frames, max_frames)
    output_path = f'output_{datetime.now().strftime("%Y%m%d_%H%M%S")}.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    predictions_history = []
    frame_count = 0

    while frame_count < frames_to_process:
        ret, frame = cap.read()
        if not ret:
            break

        landmarks, annotated_frame = extract_landmarks(frame)

        if landmarks is not None:
            features = compute_geometric_features(landmarks)
            predicted_class, probabilities, confidence = preprocess_and_predict(features)

            predictions_history.append({
                'frame': frame_count,
                'activity': predicted_class,
                'confidence': confidence
            })

            activity_text = predicted_class.replace('_', ' ').title()
            confidence_text = f"{confidence*100:.1f}%"

            color = (0, 255, 0) if confidence > 0.9 else (0, 255, 255) if confidence > 0.7 else (0, 0, 255)

            cv2.rectangle(annotated_frame, (10, 10), (width-10, 100), (0, 0, 0), -1)
            cv2.rectangle(annotated_frame, (10, 10), (width-10, 100), color, 3)
            cv2.putText(annotated_frame, f"Actividad: {activity_text}",
                       (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(annotated_frame, f"Confianza: {confidence_text}",
                       (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        else:
            cv2.putText(annotated_frame, "No se detecta pose",
                       (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        out.write(annotated_frame)
        frame_count += 1

        if frame_count % 30 == 0:
            print(f"   ‚è≥ {frame_count}/{frames_to_process}...")

    cap.release()
    out.release()

    print(f"   ‚úÖ Procesado: {frame_count} frames\n")

    if predictions_history:
        df = pd.DataFrame(predictions_history)
        most_common = df['activity'].mode()[0]
        most_common_pct = (df['activity'] == most_common).sum() / len(df) * 100
        avg_confidence = df['confidence'].mean() * 100
        activity_counts = df['activity'].value_counts()

        summary_text = f"""
üìä RESUMEN

üéØ Frames: {len(df)}
üèÜ Actividad: {most_common.replace('_', ' ').title()} ({most_common_pct:.1f}%)
üìà Confianza: {avg_confidence:.1f}%

üìã Distribuci√≥n:
"""
        for activity, count in activity_counts.items():
            pct = count / len(df) * 100
            summary_text += f"   ‚Ä¢ {activity.replace('_', ' ').title()}: {count} ({pct:.1f}%)\n"

        # Gr√°fico
        avg_probs = {}
        for activity in label_encoder.classes_:
            activity_data = df[df['activity'] == activity]
            avg_probs[activity] = activity_data['confidence'].mean() if len(activity_data) > 0 else 0

        fig, ax = plt.subplots(figsize=(10, 6))
        activities = [a.replace('_', ' ').title() for a in avg_probs.keys()]
        confidences = list(avg_probs.values())

        bars = ax.barh(activities, confidences, color='skyblue')
        bars[confidences.index(max(confidences))].set_color('green')

        ax.set_xlabel('Confianza Promedio')
        ax.set_title('Confianza por Actividad', fontweight='bold')
        ax.set_xlim(0, 1)

        for i, v in enumerate(confidences):
            ax.text(v + 0.02, i, f'{v*100:.1f}%', va='center')

        plt.tight_layout()

        return output_path, summary_text, fig

    return output_path, "‚ö†Ô∏è Sin poses detectadas", None


def gradio_interface(video):
    if video is None:
        return None, "‚ö†Ô∏è Sube un video", None
    return process_video(video, max_frames=300)

print("‚úÖ Funciones listas\n")

# ============================================
# LANZAR INTERFAZ
# ============================================

print("üé® CREANDO INTERFAZ...")

interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Video(label="üìπ Sube un video"),
    outputs=[
        gr.Video(label="üé• Video Procesado"),
        gr.Textbox(label="üìä Resumen", lines=12),
        gr.Plot(label="üìà Gr√°fico")
    ],
    title="üèÉ Clasificador de Actividades Humanas",
    description="""
    **MediaPipe + MLP (99% accuracy)**

    Actividades: Caminar Hacia, Caminar Regreso, Girar, Ponerse de Pie, Sentarse

    Sube un video corto (m√°x 10s) y espera el procesamiento.
    """,
    article="Desarrollado por Tom√°s Quintero - Universidad ICESI - Nov 2025",
    cache_examples=False,
    allow_flagging="never"
)

print("=" * 60)
print("üöÄ LANZANDO APLICACI√ìN...")
print("=" * 60)

interface.launch(share=True, debug=True)


üì¶ INSTALANDO DEPENDENCIAS...
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, which is not installed.
albucore 0.0.24 requires opencv-python-headless>=4.9.0.80, which is not installed.
dopamine-rl 4.1.2 requires opencv-python>=3.4.8.29, which is not installed.
dopamine-rl 4.1.2 requires tensorflow>=2.2.0, which is not installed.
albumentations 2.0.8 requires opencv-python-headless>=4.9.0.80, which is not installed.
yfinance 0.2.66 requires websockets>=13.0, but you have websockets 12.0 which is incompatible.
ydf 0.13.0 requires protobuf<7.0.0,>=5.29.1, but you have protobuf 4.25.8 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.5 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the pac

ValueError: <class 'numpy.random._mt19937.MT19937'> is not a known BitGenerator module.