In [None]:
!pip install mlxtend




In [None]:
# Cargar librerías
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

**Reglas de asociación**


Ver estadisticas mas altas a traves de tipos y relación de estadisticas con tipo

In [None]:
# 1. Definir umbrales como concepto "alta"
stat_thresholds = {
    'HP': 95,
    'Attack': 110,
    'Defense': 100,
    'Sp._Atk': 110,
    'Sp._Def': 100,
    'Speed': 105
}

def build_attributes(row):
    tags = []

    # Tipos
    if pd.notna(row['Type_1']):
        tags.append(f'Type={row["Type_1"]}')
    if pd.notna(row['Type_2']):
        tags.append(f'Type={row["Type_2"]}')

    # Estadísticas altas
    for stat, threshold in stat_thresholds.items():
        if row[stat] >= threshold:
            tags.append(f'High_{stat}')
    return tags

transactions = df.apply(build_attributes, axis=1).tolist()
te = TransactionEncoder()
te_array = te.fit_transform(transactions)
df_encoded = pd.DataFrame(te_array, columns=te.columns_)
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.3)

filtered_rules = rules[
    rules['antecedents'].apply(lambda x: any('Type=' in str(item) for item in x)) &
    rules['consequents'].apply(lambda x: any('High_' in str(item) for item in x))
]

resultado = filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values(by='lift', ascending=False)
print(resultado.head(25))


                     antecedents     consequents   support  confidence      lift
40      (High_HP, Type=Fighting)   (High_Attack)  0.013283    0.933333  5.346377
62    (High_Sp._Def, Type=Steel)  (High_Defense)  0.015180    0.888889  4.804558
61  (Type=Psychic, High_Defense)  (High_Sp._Def)  0.014231    0.714286  4.764919
67        (High_HP, Type=Dragon)  (High_Sp._Atk)  0.012334    0.565217  4.445814
36          (High_HP, Type=Dark)   (High_Attack)  0.012334    0.764706  4.380435
43    (High_Attack, Type=Ground)       (High_HP)  0.012334    0.684211  4.168543
30     (High_Attack, Type=Steel)  (High_Defense)  0.015180    0.761905  4.118193
38        (High_HP, Type=Dragon)   (High_Attack)  0.013283    0.608696  3.486767
69   (Type=Dragon, High_Sp._Atk)       (High_HP)  0.012334    0.565217  3.443579
39    (High_Attack, Type=Dragon)       (High_HP)  0.013283    0.560000  3.411792
42        (High_HP, Type=Ground)   (High_Attack)  0.012334    0.590909  3.384881
14                  (Type=St

Relación estadistica, generación y si es legendario

In [None]:
df_assoc = df.copy()

# Discretizar Attack y Speed en 'Low', 'Medium', 'High'
df_assoc['Attack_level'] = pd.qcut(df_assoc['Attack'], q=3, labels=['Low', 'Medium', 'High'])
df_assoc['Speed_level'] = pd.qcut(df_assoc['Speed'], q=3, labels=['Low', 'Medium', 'High'])

# Convertir Legendary y Generation a string categórica
df_assoc['Legendary_str'] = df_assoc['Legendary'].astype(str)
df_assoc['Generation_str'] = 'Gen_' + df_assoc['Generation'].astype(str)

# Armar lista de características por fila
df_assoc['Attributes'] = df_assoc.apply(
    lambda row: [
        f"Attack_{row['Attack_level']}",
        f"Speed_{row['Speed_level']}",
        f"Legendary_{row['Legendary_str']}",
        f"{row['Generation_str']}"
    ],
    axis=1
)

# Convertir a transacciones
transactions = df_assoc['Attributes'].tolist()

te = TransactionEncoder()
te_array = te.fit_transform(transactions)
df_encoded = pd.DataFrame(te_array, columns=te.columns_)

from mlxtend.frequent_patterns import apriori, association_rules

frequent_itemsets = apriori(df_encoded, min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Filtrar reglas con confianza alta y que involucren "Legendary" o "Attack"
filtered_rules = rules[
    rules['antecedents'].astype(str).str.contains("Attack_|Speed_|Legendary_|Gen_") |
    rules['consequents'].astype(str).str.contains("Attack_|Speed_|Legendary_|Gen_")
]

# Ordenar por lift
filtered_rules = filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values(by='lift', ascending=False)

filtered_rules.head(20)


Unnamed: 0,antecedents,consequents,support,confidence,lift
21,"(Legendary_1, Attack_High)",(Speed_High),0.055977,0.75641,2.344872
16,(Legendary_1),(Speed_High),0.078748,0.734513,2.276991
20,"(Speed_High, Legendary_1)",(Attack_High),0.055977,0.710843,2.263531
1,(Legendary_1),(Attack_High),0.074004,0.690265,2.198006
32,"(Speed_Medium, Gen_5)",(Legendary_0),0.056926,1.0,1.120085
25,"(Speed_Medium, Attack_Low)",(Legendary_0),0.117647,1.0,1.120085
22,"(Gen_1, Attack_Low)",(Legendary_0),0.053131,1.0,1.120085
31,"(Speed_Medium, Gen_3)",(Legendary_0),0.050285,1.0,1.120085
30,"(Gen_1, Speed_Medium)",(Legendary_0),0.052182,1.0,1.120085
2,(Attack_Low),(Legendary_0),0.330171,0.988636,1.107357


RandomForestClassifier:
modelo para predecir si es legendario

In [None]:
# 1. Regenerar codificación de tipos (dummy)
type1_dummies = pd.get_dummies(df['Type_1'], prefix='T1')
type2_dummies = pd.get_dummies(df['Type_2'], prefix='T2')

# Asegura coherencia de nombres (capitaliza todos)
type1_dummies.columns = [col.capitalize() for col in type1_dummies.columns]
type2_dummies.columns = [col.capitalize() for col in type2_dummies.columns]

# 2. Concatenar con df
df_full = pd.concat([df, type1_dummies, type2_dummies], axis=1)

# 3. Definir variables predictoras y target
features = stats_cols + list(type1_dummies.columns) + list(type2_dummies.columns)
X = df_full[features]
y = df_full['Legendary']

# 4. División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1545)

# 5. Entrenar modelo
clf = RandomForestClassifier(n_estimators=1000, random_state=1545)
clf.fit(X_train, y_train)

# 6. Predicción y evaluación
y_pred = clf.predict(X_test)

print("=== Accuracy ===")
print(accuracy_score(y_test, y_pred))

print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred))

print("\n=== Matriz de Confusión ===")
print(confusion_matrix(y_test, y_pred))

=== Accuracy ===
0.9022082018927445

=== Classification Report ===
              precision    recall  f1-score   support

           0       0.90      1.00      0.95       276
           1       1.00      0.24      0.39        41

    accuracy                           0.90       317
   macro avg       0.95      0.62      0.67       317
weighted avg       0.91      0.90      0.88       317


=== Matriz de Confusión ===
[[276   0]
 [ 31  10]]
