# 03 - Modelado y Análisis Predictivo
## Proyecto Final: Análisis de Datos
### Objetivo: Desarrollar modelos predictivos para renovación de contratos y retrasos en proyectos

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

print('Librerías cargadas correctamente')

In [None]:
# Cargar datos
clients = pd.read_csv('../data/clients_curated.csv')
projects = pd.read_csv('../data/projects_curated.csv')

print('=== DATOS CARGADOS ===')
print(f'Clientes: {clients.shape}')
print(f'Proyectos: {projects.shape}')

### 1. Modelo: Predicción de Renovación de Contratos

In [None]:
# Preparar características y target
X_contract = clients[['industry', 'size', 'region', 'tickets_opened_last_year', 
                     'avg_response_time_hours', 'satisfaction_score']]
y_contract = clients['renewed_contract']

print('=== MODELO 1: RENOVACIÓN DE CONTRATOS ===')
print(f'Características: {X_contract.shape}')
print(f'Target distribution: {y_contract.value_counts().to_dict()}')

In [None]:
# Preprocesamiento
numeric_features = ['tickets_opened_last_year', 'avg_response_time_hours', 'satisfaction_score']
categorical_features = ['industry', 'size', 'region']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore', drop='first'), categorical_features)
    ])

# Crear pipeline
model_contract = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Dividir datos
X_train, X_test, y_train, y_test = train_test_split(X_contract, y_contract, 
                                                    test_size=0.2, random_state=42, 
                                                    stratify=y_contract)

print('Datos divididos para entrenamiento y prueba')