# 📘 ETL_Modelo_Streamlit_B
Notebook otimizado para treinar o modelo e exportar apenas o artefato `modelo.joblib`, junto com `train_and_rank.py`, `app.py` e `requirements.txt` para deploy no Streamlit Cloud.

## 1) Imports e Configuração

In [None]:

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

ID_VAGA_COL, ID_CAND_COL, TARGET_COL = "id_vaga", "id_candidato", "target"

TRAIN_PATH = "/content/aprovados_reprovados.csv"
MODEL_PATH = "/content/modelo.joblib"
EXPORT_DIR = "/content/artifacts_b"
os.makedirs(EXPORT_DIR, exist_ok=True)


## 2) Leitura dos Dados de Treino

In [None]:

df_train = pd.read_csv(TRAIN_PATH)
print("Treino:", df_train.shape)
df_train.head()


## 3) Pré-processamento e Treino

In [None]:

X, y = df_train.drop(columns=[TARGET_COL]), df_train[TARGET_COL]
cat_cols = X.select_dtypes(include=["object"]).columns.tolist()
for c in [ID_VAGA_COL, ID_CAND_COL]:
    if c in cat_cols: cat_cols.remove(c)
preprocessor = ColumnTransformer([("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)], remainder="passthrough")

model = Pipeline([("preprocess", preprocessor),
                  ("clf", RandomForestClassifier(n_estimators=300, random_state=42))])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model.fit(X_train, y_train)

y_pred, y_proba = model.predict(X_val), model.predict_proba(X_val)[:,1]
print(classification_report(y_val, y_pred))
print("ROC AUC:", roc_auc_score(y_val, y_proba))


## 4) Salvar Modelo Treinado (`modelo.joblib`)

In [None]:

joblib.dump(model, MODEL_PATH)
print(f"Modelo salvo em: {MODEL_PATH}")


## 5) Exportar `train_and_rank.py`

In [None]:

train_code = """
import pandas as pd
import joblib

ID_VAGA_COL, ID_CAND_COL = "id_vaga", "id_candidato"

def load_model(model_path="modelo.joblib"):
    return joblib.load(model_path)

def rank_candidates(model, df_pending, top_k=10):
    df_pred = df_pending.copy()
    df_pred["score"] = model.predict_proba(df_pred)[:, 1]
    df_pred["rank"] = df_pred.groupby(ID_VAGA_COL)["score"].rank(ascending=False, method="first")
    ranking = (
        df_pred[df_pred["rank"] <= top_k]
        .sort_values([ID_VAGA_COL, "rank"])
        .reset_index(drop=True)
    )
    return ranking
"""

with open(os.path.join(EXPORT_DIR, "train_and_rank.py"), "w", encoding="utf-8") as f:
    f.write(train_code)
print("Arquivo train_and_rank.py exportado!")


## 6) Exportar `app.py`

In [None]:

app_code = """
import pandas as pd
import streamlit as st
from train_and_rank import load_model, rank_candidates

st.set_page_config(page_title="Netflix das Vagas", layout="wide")
st.title("🎬 Netflix das Vagas — Top 10 Candidatos por Vaga")

pending_file = st.file_uploader("📂 CSV de pendentes (não classificados)", type=["csv"])

if pending_file:
    df_pending = pd.read_csv(pending_file)
    model = load_model("modelo.joblib")
    ranking = rank_candidates(model, df_pending, top_k=10)
    st.success("✅ Ranking gerado!")

    vagas = sorted(ranking["id_vaga"].unique().tolist())
    vaga_sel = st.sidebar.selectbox("Selecione a vaga", vagas)

    top = ranking[ranking["id_vaga"] == vaga_sel].sort_values("rank")
    st.subheader(f"Top {len(top)} candidatos para a vaga {vaga_sel}")

    cols = st.columns(5)
    for i, (_, row) in enumerate(top.iterrows()):
        col = cols[i % 5]
        with col:
            st.markdown(f"### 👤 Candidato {row['id_candidato']}")
            st.metric("Score", f"{row['score']:.3f}")
            st.caption(f"Rank: {int(row['rank'])}")

    with st.expander("📊 Tabela completa da vaga"):
        st.dataframe(top)
else:
    st.info("⏳ Faça upload do CSV de pendentes para rodar o ranking.")
"""

with open(os.path.join(EXPORT_DIR, "app.py"), "w", encoding="utf-8") as f:
    f.write(app_code)
print("Arquivo app.py exportado!")


## 7) Exportar `requirements.txt`

In [None]:

reqs = """pandas==2.2.2
numpy==1.26.4
scikit-learn==1.4.2
streamlit==1.32.0
joblib==1.4.2
"""

with open(os.path.join(EXPORT_DIR, "requirements.txt"), "w", encoding="utf-8") as f:
    f.write(reqs)
print("Arquivo requirements.txt exportado!")
