In [1]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
test_tfidf_data_path = "../../data/preprocessed/type/tfidf_test.csv"
test_w2v_data_path = "../../data/preprocessed/type/w2v_test.csv"
model_tfidf_path = "../../models/sgd_type_model_tfidf.pkl"
model_w2v_path = "../../models/sgd_type_model_w2v.pkl"

In [3]:
model = joblib.load(model_tfidf_path)
test_tfidf_data = pd.read_csv(test_tfidf_data_path)
target_col = "Type"
X_test = test_tfidf_data.drop(columns=[target_col])
y_test = test_tfidf_data[target_col]

In [4]:
y_pred = model.predict(X_test)

le = joblib.load("../../models/label_encoder_type.pkl")
y_test = le.transform(y_test.values)

In [5]:
print("Classification Report for TF-IDF Model:")
class_report = classification_report(y_test, y_pred, digits=3, target_names=le.classes_)
print(class_report)
print("Confusion Matrix for TF-IDF Model:")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)
with open("../../report/classification_report_sgd_tfidf.txt", "w") as f:
    f.write(class_report)

Classification Report for TF-IDF Model:
              precision    recall  f1-score   support

  Attractive      0.954     0.958     0.956     13985
       Hotel      0.962     0.891     0.925     10282
  Restaurant      0.931     0.969     0.949     17344

    accuracy                          0.946     41611
   macro avg      0.949     0.939     0.943     41611
weighted avg      0.946     0.946     0.945     41611

Confusion Matrix for TF-IDF Model:
[[13394   149   442]
 [  315  9160   807]
 [  338   208 16798]]


In [6]:
# ahora con w2v
model = joblib.load(model_w2v_path)
test_w2v_data = pd.read_csv(test_w2v_data_path)
X_test_w2v = test_w2v_data.drop(columns=[target_col])
y_test_w2v = test_w2v_data[target_col]


In [7]:
y_pred = model.predict(X_test_w2v)

In [8]:
y_test_w2v = le.transform(y_test_w2v.values)

In [9]:
print("Classification Report for W2V Model:")
class_report = classification_report(y_test_w2v, y_pred, digits=3, target_names=le.classes_)
print(class_report)
print("Confusion Matrix for TF-IDF Model:")
conf_matrix = confusion_matrix(y_test_w2v, y_pred)
print(conf_matrix)
with open("../../report/classification_report_sgd_w2v.txt", "w") as f:
    f.write(class_report)

Classification Report for W2V Model:
              precision    recall  f1-score   support

  Attractive      0.953     0.947     0.950     13985
       Hotel      0.931     0.895     0.913     10282
  Restaurant      0.934     0.960     0.947     17344

    accuracy                          0.940     41611
   macro avg      0.939     0.934     0.937     41611
weighted avg      0.940     0.940     0.940     41611

Confusion Matrix for TF-IDF Model:
[[13247   324   414]
 [  317  9200   765]
 [  336   353 16655]]
