In [None]:
from google.colab import userdata
userdata.get('GOOGLE_API_KEY_1')

In [None]:
# ==========================================
# Random Forest
# ==========================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import export_text, plot_tree

In [None]:
df = pd.read_csv('/content/Varejo.csv')
df.head()

In [None]:
# ==========================================
# 2. Separação em treino e teste
# ==========================================
X = df.drop("CompEmbRec", axis=1)
y = df["CompEmbRec"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


In [None]:
# ==========================================
# 3. Modelo Random Forest
# ==========================================
modelo_rf = RandomForestClassifier(
    n_estimators=100,   # número de árvores
    max_depth=6,        # profundidade máxima
    min_samples_split=5,
    random_state=42
)

modelo_rf.fit(X_train, y_train)

In [None]:
# ==========================================
# 4. Avaliação
# ==========================================
y_pred = modelo_rf.predict(X_test)

print("\nAcurácia:", accuracy_score(y_test, y_pred))
print("\nMatriz de Confusão:\n", confusion_matrix(y_test, y_pred))
print("\nRelatório de Classificação:\n", classification_report(y_test, y_pred))

# Heatmap matriz de confusão
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Previsto")
plt.ylabel("Real")
plt.title("Matriz de Confusão - Random Forest")
plt.show()

In [None]:

# ==========================================
# 5. Importância das Variáveis
# ==========================================
importancias = pd.Series(modelo_rf.feature_importances_, index=X.columns)
importancias = importancias.sort_values(ascending=True)

plt.figure(figsize=(8,5))
importancias.plot(kind="barh", color="darkblue")
plt.title("Importância das Variáveis no Random Forest")
plt.xlabel("Importância")
plt.show()

print("\nImportância das Variáveis:")
print(importancias.sort_values(ascending=False))

print("\nImportância das Variáveis:")
print(importancias.sort_values(ascending=False))
