In [1]:
import wget
url = 'https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson1/requirements.txt'
wget.download(url, 'requirements.txt')

import gdown
url = 'https://drive.google.com/file/d/1Ns80_G8fAA0xs4e-7yvCVToYD-D5bNmb/view?usp=sharing'
output = 'params.py'
gdown.download(url=url, output=output, quiet=False, fuzzy=True)

100% [....................................................................................] 82 / 82

Downloading...
From (original): https://drive.google.com/uc?id=1Ns80_G8fAA0xs4e-7yvCVToYD-D5bNmb
From (redirected): https://drive.google.com/uc?id=1Ns80_G8fAA0xs4e-7yvCVToYD-D5bNmb&confirm=t&uuid=d03694cf-2cc2-4eb1-95d0-2b145f0174e0
To: C:\Users\abia0\OneDrive\Área de Trabalho\Projetos\Faculdade\topicos_especiais\Effective-MLops\params.py

00%|█████████████████████████████████████████████████████████████████████████████████| 278/278 [00:00<00:00, 76.4kB/s]

'params.py'

In [2]:
import wandb
import pandas as pd
from fastai.vision.all import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

import params

train_config = SimpleNamespace(
    project=params.WANDB_PROJECT,
    entity=params.ENTITY,
    bdd_classes=params.BDD_CLASSES,
    raw_data_at=params.RAW_DATA_AT,
    processed_data_at=params.PROCESSED_DATA_AT,
    job_type="train",
    framework="sklearn",
    seed=42,
)


In [3]:
def download_data():
  processed_data_at = wandb.use_artifact(f'{train_config.processed_data_at}:latest')
  processed_dataset_dir = Path(processed_data_at.download())
  df = pd.read_csv(processed_dataset_dir / 'data_split.csv')
  return df

In [4]:
def preprocess_data():
  df = download_data()
  df_copy = df.copy()
  label_encoder = LabelEncoder()
  df_copy['classe'] = label_encoder.fit_transform(df['classe'])

  return df_copy

In [5]:
def get_df(is_test=False):
  df = preprocess_data()

  if is_test:
      return df[df['Stage'] == 'test']
  else:
      df_train = df[df['Stage'] == 'train']
      df_valid = df[df['Stage'] == 'valid']
      return df_train, df_valid

In [6]:
def split(is_test=False):

  if not is_test:
      df_train, df_valid = get_df(is_test=False)
      X_train = df_train.drop(columns=['classe', '0', 'Stage'])
      y_train = df_train['classe']
      X_val = df_valid.drop(columns=['classe', '0', 'Stage'])
      y_val = df_valid['classe']
      return X_train, y_train, X_val, y_val
  else:
      df_test = get_df(is_test=True)
      X_test = df_test.drop(columns=['classe', '0', 'Stage'])
      y_test = df_test['classe']
      return X_test, y_test

In [7]:
def log_predictions(val_accuracy, val_report, test_accuracy, test_report, y_test, y_test_pred):

  print("==== Resultados - Validação ====")
  print(f"Acurácia: {val_accuracy:.4f}")
  print(val_report)

  print("\n==== Resultados - Teste ====")
  print(f"Acurácia: {test_accuracy:.4f}")
  print(test_report)

  df_test = get_df(is_test=True)
  predictions_df = pd.DataFrame({
      'id': df_test['0'],
      'Ground Truth': y_test,
      'Predictions': y_test_pred
  })

  # Criar uma tabela W&B
  table = wandb.Table(dataframe=predictions_df)

  # Logar a tabela no W&B
  wandb.log({"prediction_table": table})

def log_final_metrics(val_accuracy, test_accuracy):
    # Salvar as métricas finais
    final_metrics = {
        'val_accuracy': val_accuracy,
        'test_accuracy': test_accuracy
    }

    # Logar as métricas finais no W&B
    wandb.summary.update(final_metrics)
    print("Final metrics logged.")

In [8]:
def train_model(train_config):
  run = wandb.init(project=train_config.project, entity=train_config.entity, job_type=train_config.job_type, config=train_config)

  X_train, y_train, X_val, y_val = split()
  X_test, y_test = split(is_test=True)

  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_val = scaler.transform(X_val)

  model = LogisticRegression(random_state=train_config.seed, max_iter=1000, class_weight='balanced')

  model.fit(X_train, y_train)

  y_val_pred = model.predict(X_val)
  y_test_pred = model.predict(X_test)

  val_accuracy = accuracy_score(y_val, y_val_pred)
  val_report = classification_report(y_val, y_val_pred)

  test_accuracy = accuracy_score(y_test, y_test_pred)
  test_report = classification_report(y_test, y_test_pred)

  log_predictions(val_accuracy, val_report, test_accuracy, test_report, y_test, y_test_pred)
  log_final_metrics(val_accuracy, test_accuracy)

  wandb.finish()

In [9]:
train_model(train_config)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\abia0\_netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

wandb:   3 of 3 files downloaded.  
wandb:   3 of 3 files downloaded.  


==== Resultados - Validação ====
Acurácia: 0.8167
              precision    recall  f1-score   support

           0       0.91      0.70      0.79        30
           1       0.76      0.93      0.84        30

    accuracy                           0.82        60
   macro avg       0.83      0.82      0.81        60
weighted avg       0.83      0.82      0.81        60


==== Resultados - Teste ====
Acurácia: 0.5167
              precision    recall  f1-score   support

           0       1.00      0.03      0.06        30
           1       0.51      1.00      0.67        30

    accuracy                           0.52        60
   macro avg       0.75      0.52      0.37        60
weighted avg       0.75      0.52      0.37        60



wandb:   3 of 3 files downloaded.  


Final metrics logged.


0,1
test_accuracy,0.51667
val_accuracy,0.81667
