In [None]:
""" 
Load trained models, test new requirements, and build inference pipelines
"""

import sys
from pathlib import Path
import pickle

import joblib
import pandas as pd

try:
    ROOT = Path(__file__).resolve().parents[1]
except NameError:
    ROOT = Path.cwd().parent

if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from config import MODELS_DIR

In [2]:
# Load trained models
clf = joblib.load(MODELS_DIR / "log_reg_tfidf.pkl")

with open(MODELS_DIR / "tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

In [None]:
# Inference function
def predict_requirement(req_text: str):
    vec = tfidf_vectorizer.transform([req_text])
    pred = clf.predict(vec)
    return pred[0]

In [None]:
# Test sample requirements
sample_reqs = [
    "The system shall provide a login authentication.",
    "Response time should not exceed 2 seconds."
]

for req in sample_reqs:
    print(req, "->", predict_requirement(req))

The system shall provide a login authentication. -> FR
Response time should not exceed 2 seconds. -> NFR


In [None]:
# Batch inference on new dataset
# new_reqs = pd.read_csv("../data/raw/new_requirements.csv")
# new_reqs["predicted_label"] = new_reqs["requirement"].apply(predict_requirement)
# new_reqs.to_csv(DATA_PROCESSED / "predicted_requirements.csv", index=False)