In [5]:
# Cell 1: Imports 
import sys
import time
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path
from xgboost import XGBClassifier

from sklearn.preprocessing              import LabelEncoder
from sklearn.model_selection            import train_test_split, StratifiedKFold
from sklearn.ensemble                   import RandomForestClassifier
from sklearn.naive_bayes                import GaussianNB
from sklearn.metrics                    import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_extraction.text    import TfidfVectorizer

from tensorflow.keras.models            import Sequential
from tensorflow.keras.layers            import Dense, Dropout, LSTM, Embedding

from scipy.stats                        import friedmanchisquare, ttest_rel
from gensim.models                      import Word2Vec

import tensorflow as tf
import transformers.modeling_tf_utils as mfu
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def ler_arquivo_txt(path):
    return Path(path).read_text(encoding="utf-8").splitlines()

given = ler_arquivo_txt("../data/GivenSteps.txt")
when = ler_arquivo_txt("../data/WhenSteps.txt")
then = ler_arquivo_txt("../data/ThenSteps.txt")

steps  = given + when + then
labels = ["Precondição"]*len(given) + ["Ação"]*len(when) + ["Resultado"]*len(then)

df = pd.DataFrame({"step": steps, "label": labels})
df.head()

Unnamed: 0,step,label
0,the user is on the login page,Precondição
1,the user has entered a valid username,Precondição
2,the user has entered a valid password,Precondição
3,the user is logged in,Precondição
4,the user is on the homepage,Precondição


In [8]:
le = LabelEncoder()
df["y"] = le.fit_transform(df["label"])
tokenized = [s.split() for s in df["step"]]

w2v = Word2Vec(sentences=tokenized, vector_size=100, window=5, min_count=1, workers=4)
tfidf = TfidfVectorizer()
X_tfidf = tfidf.fit_transform(df["step"]).toarray()

def get_features(X_tfidf, tok, w2v):
    feats = np.zeros((len(tok), X_tfidf.shape[1] + w2v.vector_size))
    for i, tks in enumerate(tok):
        vec = np.mean([w2v.wv[w] for w in tks if w in w2v.wv], axis=0)
        feats[i] = np.hstack((X_tfidf[i], vec))
    return feats

X = get_features(X_tfidf, tokenized, w2v)
y = df["y"].values

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.5, random_state=42)

In [9]:
# Multi Layer Perceptron (MLP)
def create_mlp_model(input_dim, num_classes):
    neurons = 143
    dropout_rate = 0.77
    activation = 'tanh'

    model = Sequential()
    model.add(Dense(neurons, input_dim=input_dim, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neurons // 2, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

# Long Short‑Term Memory (LSTM)
def create_lstm_model(input_dim, num_classes):
    model = Sequential()
    model.add(Embedding(input_dim=input_dim, output_dim=64, input_length=input_dim))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

# Random Forest
def create_rf_model(n_estimators=50, random_state=42):
    return RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)

# Gaussian Naive Bayes
def create_nb_model():
    return GaussianNB()

# XGBoost
def create_xgb_model():
    return XGBClassifier(
        eval_metric='mlogloss',
        random_state=42,
        n_jobs=4
    )

# BERT

# 6.1 Prepara o tokenizer e o modelo
le_tf      = LabelEncoder().fit(df["label"])
labels_tf  = le_tf.transform(df["label"])
tokenizer_ = AutoTokenizer.from_pretrained("bert-base-uncased")
encodings  = tokenizer_(
    df["step"].tolist(),
    truncation=True, padding=True,
    max_length=64, return_tensors="tf"
)

# 6.2 Monta tf.data.Dataset
dataset_bert = tf.data.Dataset.from_tensor_slices((
    dict(encodings),
    labels_tf
)).shuffle(len(df)).batch(16)

# 6.3 Carrega e compila o modelo TF
model_bert = TFAutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(le_tf.classes_)
)
model_bert.compile(
    optimizer=tf.keras.optimizers.Adam(2e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# 1) MLP
mlp_model = create_mlp_model(X.shape[1], len(np.unique(y)))
mlp_model.fit(X, y, epochs=5, batch_size=32, verbose=0)

# 2) LSTM
lstm_model = create_lstm_model(X.shape[1], len(np.unique(y)))
lstm_model.fit(X, y, epochs=5, batch_size=32, verbose=0)

# 3) Random Forest
rf_model = create_rf_model()
rf_model.fit(X, y)

# 4) GaussianNB
gnb_model = create_nb_model()
gnb_model.fit(X, y)

# 5) XGBoost
xgb_model = create_xgb_model()
xgb_model.fit(X, y)


# Redireciona o keras dentro do transformers para o tf.keras
mfu.keras = tf.keras
mfu.keras.utils = tf.keras.utils

# 6.4 Fine‑tuning
model_bert.fit(dataset_bert, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x253ce017550>

In [14]:
# Guarde tudo num dicionário
models = {
    'MLP':  mlp_model,
    'LSTM': lstm_model,
    'RF':   rf_model,
    'GNB':  gnb_model,
    'XGB':  xgb_model,
    'BERT': model_bert
}

In [15]:
def classify_step_all_models(step, models, tfidf, w2v, le, tokenizer, le_tf):
    # Extrai features TF-IDF + Word2Vec
    vec_tfidf = tfidf.transform([step]).toarray()[0]
    vec_w2v   = np.mean(
        [w2v.wv[t] for t in step.split() if t in w2v.wv],
        axis=0
    ) if any(t in w2v.wv for t in step.split()) else np.zeros(w2v.vector_size)
    X_vec = np.hstack([vec_tfidf, vec_w2v]).reshape(1, -1)

    results = {}
    for name, model in models.items():
        if name == 'BERT':
            inputs = tokenizer_(step, return_tensors="tf", truncation=True, padding=True)
            logits = model(inputs).logits
            pred   = tf.argmax(logits, axis=1).numpy()[0]
            results[name] = le_tf.inverse_transform([pred])[0]
        else:
            pred = model.predict(X_vec)
            if isinstance(pred, np.ndarray) and pred.ndim == 2:
                pred = pred.argmax(axis=1)[0]
            results[name] = le.inverse_transform([int(pred)])[0]
    return results

In [18]:
step = "the expected value is 3"

predicoes = classify_step_all_models(
    step,        # string a classificar
    models,      # dict com 'MLP','LSTM','RF','GNB','XGB','BERT'
    tfidf,       # seu TfidfVectorizer treinado
    w2v,         # seu Word2Vec treinado
    le,          # LabelEncoder dos modelos tradicionais
    tokenizer_,  # AutoTokenizer do BERT
    le_tf        # LabelEncoder usado só para o BERT
)

for nome, label in predicoes.items():
    print(f"{nome} -> {label}")

MLP -> Resultado
LSTM -> Ação
RF -> Resultado
GNB -> Resultado
XGB -> Precondição
BERT -> Resultado


In [31]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

df_test = pd.read_csv("../data/real_steps.csv")  
print(df_test.head())
y_true = df_test['label'].values

for name, model in models.items():
    y_pred = [ classify_step_all_models(step, models, tfidf, w2v, le, tokenizer_, le_tf)[name]
               for step in df_test['step'] ]
    acc   = accuracy_score(y_true, y_pred)
    prec  = precision_score(y_true, y_pred, average='macro', zero_division=0)
    rec   = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1    = f1_score(y_true, y_pred, average='macro', zero_division=0)
    print(f"{name}: acc={acc:.3f}, prec={prec:.3f}, rec={rec:.3f}, f1={f1:.3f}")
    print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

                                     step        label
0                 Given the cart is empty  Precondição
1  When the user adds an item to the cart         Ação
2   Then the cart should contain the item    Resultado
3            Given the user is logged out  Precondição
4   When the user clicks the login button         Ação
MLP: acc=0.833, prec=0.889, rec=0.833, f1=0.822
Confusion matrix:
 [[2 0 0]
 [0 1 1]
 [0 0 2]]
LSTM: acc=0.333, prec=0.111, rec=0.333, f1=0.167
Confusion matrix:
 [[2 0 0]
 [2 0 0]
 [2 0 0]]
RF: acc=0.500, prec=0.467, rec=0.500, f1=0.413
Confusion matrix:
 [[2 0 0]
 [2 0 0]
 [1 0 1]]
GNB: acc=0.667, prec=0.500, rec=0.667, f1=0.556
Confusion matrix:
 [[2 0 0]
 [0 0 2]
 [0 0 2]]
XGB: acc=0.833, prec=0.889, rec=0.833, f1=0.822
Confusion matrix:
 [[2 0 0]
 [0 2 0]
 [0 1 1]]
BERT: acc=0.833, prec=0.889, rec=0.833, f1=0.822
Confusion matrix:
 [[2 0 0]
 [0 1 1]
 [0 0 2]]


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

df_test = pd.read_csv("../data/real_steps.csv")  
print(df_test.head())
y_true = df_test['label'].values

# 1) Selecione apenas XGB e BERT:
selected_models = {k: v for k, v in models.items() if k in ['XGB', 'BERT']}

for name, model in selected_models.items():
    print(f"{name} → {model}")
    y_pred = [ classify_step_all_models(step, models, tfidf, w2v, le, tokenizer_, le_tf)[name]
               for step in df_test['step'] ]
    acc   = accuracy_score(y_true, y_pred)
    prec  = precision_score(y_true, y_pred, average='macro', zero_division=0)
    rec   = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1    = f1_score(y_true, y_pred, average='macro', zero_division=0)
    print(f"{name}: acc={acc:.3f}, prec={prec:.3f}, rec={rec:.3f}, f1={f1:.3f}")
