In [None]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
import importlib
import src.evaluate
importlib.reload(src.evaluate)
from src.evaluate import evaluate
from src.feature_extraction import extract_features
import pandas as pd
import joblib
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Run evaluation
evaluate()

# ROC Curve
data = pd.read_csv('../data/processed/processed_data.csv')
X = data['text']
y = data['label']
_, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Use absolute paths for model and vectorizer
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd())))
models_dir = os.path.join(root_dir, 'models')
model = joblib.load(os.path.join(models_dir, 'saved_model.pkl'))
vectorizer = joblib.load(os.path.join(models_dir, 'vectorizer.pkl'))
X_test_vec = extract_features(X_test, vectorizer, fit=False)
y_scores = model.predict_proba(X_test_vec)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.savefig('../reports/figures/roc_curve.png')
plt.show()