# Librerias Utilizadas

In [31]:
import pandas as pd
from apyori import apriori
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, KBinsDiscretizer

# Lectura del .csv

In [33]:
df = pd.read_csv("movies_clean_scaled.csv", encoding="ISO-8859-1")

# Reglas de asociación

In [45]:
df.fillna({"productionCompanyCountry": "Desconocido", "productionCountry": "Desconocido", "originalLanguage": "Desconocido"}, inplace=True)

selector_num = make_column_selector(dtype_include=['int64', 'float64'])
selector_cat = make_column_selector(dtype_include=['object'])

variables_num = selector_num(df)
variables_cat = selector_cat(df)

preprocesador = ColumnTransformer(
    transformers=[
        ("num", KBinsDiscretizer(n_bins=5, encode="ordinal", strategy="uniform"), variables_num),
        ("cat", OneHotEncoder(sparse_output=False, drop="first"), variables_cat)
    ]
)

datos_transformados = preprocesador.fit_transform(df)

etiquetas_num_binarias = [f"{variables_num[i]}_bin{j}" for i in range(len(variables_num)) for j in range(5)]
etiquetas_cat_codificadas = preprocesador.named_transformers_["cat"].get_feature_names_out(variables_cat)

todas_etiquetas = etiquetas_num_binarias[:datos_transformados.shape[1] - len(etiquetas_cat_codificadas)] + list(etiquetas_cat_codificadas)

df_transformado = pd.DataFrame(datos_transformados, columns=todas_etiquetas)

transacciones = df_transformado.apply(lambda row: row[row == 1].index.tolist(), axis=1).tolist()

min_soporte = 0.05  
min_confianza = 0.6  
reglas = apriori(transacciones, min_support=min_soporte, min_confidence=min_confianza, min_lift=1.2, min_length=2)

lista_reglas = list(reglas)
reglas_asociacion = []

for regla in lista_reglas:
    for estadistica_ordenada in regla.ordered_statistics:
        reglas_asociacion.append({
            "Regla": f"{list(estadistica_ordenada.items_base)} → {list(estadistica_ordenada.items_add)}",
            "Soporte": regla.support,
            "Confianza": estadistica_ordenada.confidence,
            "Lift": estadistica_ordenada.lift 
        })

df_reglas = pd.DataFrame(reglas_asociacion)
print(df_reglas.head())

df_reglas.to_csv("reglas_asociacion.csv", index=False)

                                               Regla   Soporte  Confianza  \
0  ['productionCompanyCountry_US'] → ['originalLa...  0.101629   0.991579   
1  ['productionCompanyCountry_US|US'] → ['origina...  0.078433   1.000000   
2  ['originalLanguage_en'] → ['productionCountry_...  0.495846   0.638511   
3  ['productionCountry_United States of America']...  0.495846   0.998046   
4  ['originalLanguage_ja'] → ['productionCountry_...  0.061064   0.944908   

        Lift  
0   1.276875  
1   1.287719  
2   1.285202  
3   1.285202  
4  15.338623  
