In [1]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
test_tfidf_data_path = "../../data/preprocessed/type/tfidf_test.csv"
test_w2v_data_path = "../../data/preprocessed/type/w2v_test.csv"
model_tfidf_path = "../../models/lr_type_model_tfidf.pkl"
model_w2v_path = "../../models/lr_type_model_w2v.pkl"

In [5]:
model = joblib.load(model_tfidf_path)
test_tfidf_data = pd.read_csv(test_tfidf_data_path)
target_col = "Type"
X_test = test_tfidf_data.drop(columns=[target_col])
y_test = test_tfidf_data[target_col]

In [11]:
y_pred = model.predict(X_test)

le = joblib.load("../../models/label_encoder_type.pkl")
y_test = le.transform(y_test.values)

In [15]:
print("Classification Report for TF-IDF Model:")
class_report = classification_report(y_test, y_pred, digits=3, target_names=le.classes_)
print(class_report)
print("Confusion Matrix for TF-IDF Model:")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)
with open("../../report/classification_report_lr_tfidf.txt", "w") as f:
    f.write(class_report)

Classification Report for TF-IDF Model:
              precision    recall  f1-score   support

  Attractive      0.957     0.959     0.958     13985
       Hotel      0.953     0.916     0.934     10282
  Restaurant      0.945     0.965     0.955     17344

    accuracy                          0.951     41611
   macro avg      0.952     0.947     0.949     41611
weighted avg      0.951     0.951     0.951     41611

Confusion Matrix for TF-IDF Model:
[[13417   184   384]
 [  279  9414   589]
 [  329   282 16733]]


In [16]:
# ahora con w2v
model = joblib.load(model_w2v_path)
test_w2v_data = pd.read_csv(test_w2v_data_path)
X_test_w2v = test_w2v_data.drop(columns=[target_col])
y_test_w2v = test_w2v_data[target_col]


In [None]:
y_pred = model.predict(X_test_w2v)

In [21]:
y_test_w2v = le.transform(y_test_w2v.values)

In [22]:
print("Classification Report for W2V Model:")
class_report = classification_report(y_test_w2v, y_pred, digits=3, target_names=le.classes_)
print(class_report)
print("Confusion Matrix for TF-IDF Model:")
conf_matrix = confusion_matrix(y_test_w2v, y_pred)
print(conf_matrix)
with open("../../report/classification_report_lr_w2v.txt", "w") as f:
    f.write(class_report)

Classification Report for W2V Model:
              precision    recall  f1-score   support

  Attractive      0.951     0.950     0.951     13985
       Hotel      0.924     0.904     0.914     10282
  Restaurant      0.941     0.953     0.947     17344

    accuracy                          0.940     41611
   macro avg      0.939     0.936     0.937     41611
weighted avg      0.940     0.940     0.940     41611

Confusion Matrix for TF-IDF Model:
[[13292   311   382]
 [  323  9297   662]
 [  357   453 16534]]
