# Proyecto de teoría de reisgos

In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold, cross_val_score 
import statsmodels.formula.api as smf
import pyreadr

In [14]:
data_proyecto = pyreadr.read_r('data_encriptada.rds')

# Extraer el DataFrame contenido
data_proyecto = data_proyecto[None]  # Usualmente el objeto principal está bajo la clave None


data_proyecto

Unnamed: 0,IDENTIFICADOR,y_rango,y_dicotomica,x4,x5,x6,x7,x8,x9,x10,...,x103,x104,x105,x106,x107,x108,x109,x1,x2,x3
0,100913.0,y_0,0,2021-08-31,1,S,C,932.0,True,4.940656e-324,...,,,,187313.297258,83616.305251,0.284405,0.357849,6151.726167,19.0,119255.573915
1,99176.0,y_30,0,2021-05-31,1,S,C,828.0,True,0.000000e+00,...,,,,164352.312433,127734.957850,0.246262,0.309129,9591.393465,1.0,37411.199713
2,107305.0,y_30,0,2022-05-31,0,S,C,868.0,True,4.940656e-324,...,0,21,21,112131.529063,80433.365550,0.423221,0.467257,11251.425064,1.0,33641.425657
3,107539.0,y_30,0,2022-05-31,1,N,C,891.0,True,4.940656e-324,...,0,60,60,374626.594517,187313.297258,0.340834,0.360938,10138.006103,0.0,69805.958238
4,97503.0,y_30,0,2021-03-31,1,M,C,927.0,False,4.940656e-324,...,,,,482180.681329,399735.279859,0.468886,0.589293,28548.623991,0.0,70557.339480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2318,96263.0,y_30,0,2021-02-28,0,M,C,978.0,False,4.940656e-324,...,,,,,,,,,4.0,181157.899713
2319,96136.0,y_30,0,2021-02-28,1,M,C,924.0,False,4.940656e-324,...,,,,,,,,,2.0,107827.161102
2320,96805.0,y_30,0,2021-02-28,1,M,C,828.0,False,0.000000e+00,...,,,,114804.924126,89226.624968,0.459019,0.623195,18101.531467,1.0,46306.244967
2321,105061.0,y_30,0,2022-03-31,0,S,C,794.0,True,0.000000e+00,...,,,,241694.577107,187845.526249,0.323695,0.340907,12605.183419,0.0,41210.746430


In [15]:
def columnas_por_tipo(dataframe):
    tipos = dataframe.dtypes
    resultado = {}
    for tipo in tipos.unique():
        # Filtrar las columnas que tienen el mismo tipo
        columnas = tipos[tipos == tipo].index.tolist()
        resultado[str(tipo)] = columnas
    return resultado


In [16]:
# Obtener la lista de tipos y columnas asociadas
tipos_columnas = columnas_por_tipo(data_proyecto)

# Imprimir el resultado
for tipo, columnas in tipos_columnas.items():
    print(f"Tipo: {tipo}")
    print(f"Columnas: {columnas}")
    print("-" * 30)

Tipo: float64
Columnas: ['IDENTIFICADOR', 'x8', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x31', 'x32', 'x33', 'x34', 'x35', 'x36', 'x38', 'x40', 'x42', 'x44', 'x47', 'x48', 'x49', 'x50', 'x51', 'x52', 'x53', 'x54', 'x55', 'x56', 'x57', 'x58', 'x59', 'x60', 'x61', 'x65', 'x67', 'x68', 'x69', 'x70', 'x71', 'x73', 'x74', 'x75', 'x76', 'x77', 'x78', 'x79', 'x80', 'x81', 'x89', 'x90', 'x91', 'x92', 'x93', 'x94', 'x95', 'x96', 'x97', 'x98', 'x99', 'x100', 'x106', 'x107', 'x108', 'x109', 'x1', 'x2', 'x3']
------------------------------
Tipo: object
Columnas: ['y_rango', 'y_dicotomica', 'x4', 'x9', 'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'x37', 'x41', 'x43', 'x45', 'x62', 'x63', 'x64', 'x66', 'x72', 'x82', 'x83', 'x84', 'x85', 'x86', 'x87', 'x88', 'x101', 'x102', 'x103', 'x104', 'x105']
------------------------------
Tipo: category
Columnas: ['x46']
------------------------------


## Selección de datos

Se selecciona la columna 'y_ditomica' como la variable dependiente, esta se renombra como 'mal_pagador'

In [None]:
data_proyecto = data_proyecto.rename(columns={'y_ditomica': 'mal_pagador'})

### Pruebas

In [None]:
datos_entrenamiento_pruebas_1, datos_test_pruebas_1 = train_test_split(data_proyecto,
                                                    test_size = 0.25, random_state=42)

### Selección final

Se seleccionan las variables

In [None]:
data_seleccionada = data_proyecto[['', '']].copy()
