### Instalar bibliotecas

In [1]:
# Instalar bibliotecas ( necessário)
!pip install mlflow evidently scikit-learn pandas numpy matplotlib seaborn papermill

# Importar pacotes principais
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from evidently.report import Report
from evidently.metrics import DataDriftTable
import pickle



[notice] A new release of pip available: 22.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting mlflow
  Downloading mlflow-2.20.4-py3-none-any.whl (28.4 MB)
     --------------------------------------- 28.4/28.4 MB 10.4 MB/s eta 0:00:00
Collecting evidently
  Downloading evidently-0.6.6-py3-none-any.whl (3.5 MB)
     ---------------------------------------- 3.5/3.5 MB 10.8 MB/s eta 0:00:00
Collecting papermill
  Downloading papermill-2.6.0-py3-none-any.whl (38 kB)
Collecting mlflow-skinny==2.20.4
  Downloading mlflow_skinny-2.20.4-py3-none-any.whl (6.0 MB)
     ---------------------------------------- 6.0/6.0 MB 12.0 MB/s eta 0:00:00
Collecting Flask<4
  Downloading flask-3.1.0-py3-none-any.whl (102 kB)
     ---------------------------------------- 103.0/103.0 kB ? eta 0:00:00
Collecting alembic!=1.10.0,<2
  Downloading alembic-1.15.1-py3-none-any.whl (231 kB)
     ------------------------------------- 231.8/231.8 kB 14.8 MB/s eta 0:00:00
Collecting docker<8,>=4.0.0
  Downloading docker-7.1.0-py3-none-any.whl (147 kB)
     -------------------------------------- 147.8/

 Garantir que o ambiente está pronto para rodar os modelos.

### Configurar MLflow para rastreamento de experimentos

In [5]:
# Importando o MLflow
import mlflow
import mlflow.sklearn
import os

# Definindo um novo diretório para armazenar os logs dos experimentos
caminho_logs = "E:/Mestrado UFCG/Semestre 2024.2/Experimentos_MLflow"

# Criando o diretório se ele não existir
os.makedirs(caminho_logs, exist_ok=True)

# Configurando o MLflow para armazenar logs nesse diretório
mlflow.set_tracking_uri(f"file:///{caminho_logs}")

# Criando um novo experimento (se já existir, ele usa o existente)
mlflow.set_experiment("Evasao_UFCG")

# Exibir a configuração para garantir que está correto
mlflow.get_tracking_uri()


2025/03/13 17:31:32 INFO mlflow.tracking.fluent: Experiment with name 'Evasao_UFCG' does not exist. Creating a new experiment.


'file:///E:/Mestrado UFCG/Semestre 2024.2/Experimentos_MLflow'

### Carregar os dados corretamente e armazenar os modelos treinados

In [6]:
import pandas as pd
import mlflow
import os

# Definir caminho base dos dados
caminho_dados = "E:/Mestrado UFCG/Semestre 2024.2/Dados/Tabelas_0/"
caminho_logs = "E:/Mestrado UFCG/Semestre 2024.2/Experimentos_MLflow/"

# Configurar MLflow
mlflow.set_tracking_uri(f"file:///{caminho_logs}")
mlflow.set_experiment("Evasao_UFCG")

# Lista de tabelas para carregar
tables = ["alunos", "tabela_motivo_evasao", "tabela_dados_ingresso", "tabela_dados_pessoais", "matriculas"]

dfs = {}  # Dicionário para armazenar os DataFrames

# Iniciar execução no MLflow
with mlflow.start_run():
    mlflow.log_param("status", "iniciando_carregamento")
    
    for table in tables:
        file_path = os.path.join(caminho_dados, f"{table}.csv")
        
        try:
            # Carregar dados
            dfs[table] = pd.read_csv(file_path)
            mlflow.log_param(f"carregamento_{table}", "sucesso")
            print(f" {table} carregada com sucesso!")
        except Exception as e:
            mlflow.log_param(f"carregamento_{table}", "falha")
            print(f" Erro ao carregar {table}: {e}")
    
    mlflow.log_param("status", "carregamento_finalizado")

print(" Todas as tabelas foram processadas.")


 alunos carregada com sucesso!
 tabela_motivo_evasao carregada com sucesso!
 tabela_dados_ingresso carregada com sucesso!
 tabela_dados_pessoais carregada com sucesso!
 Erro ao carregar matriculas: Error tokenizing data. C error: Expected 1 fields in line 83624, saw 2



MlflowException: Changing param values is not allowed. Param with key='status' was already logged with value='iniciando_carregamento' for run ID='c8ee01c9263241ca8d22de5efd65e9e6'. Attempted logging new value 'carregamento_finalizado'.

The cause of this error is typically due to repeated calls
to an individual run_id event logging.

Incorrect Example:
---------------------------------------
with mlflow.start_run():
    mlflow.log_param("depth", 3)
    mlflow.log_param("depth", 5)
---------------------------------------

Which will throw an MlflowException for overwriting a
logged parameter.

Correct Example:
---------------------------------------
with mlflow.start_run():
    with mlflow.start_run(nested=True):
        mlflow.log_param("depth", 3)
    with mlflow.start_run(nested=True):
        mlflow.log_param("depth", 5)
---------------------------------------

Which will create a new nested run for each individual
model and prevent parameter key collisions within the
tracking store.