In [10]:
import pandas as pd
import numpy as np
import psycopg2
import pandas as pd
from dotenv import load_dotenv
import os
import sys
from sqlalchemy import create_engine
from importlib import reload
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns



In [3]:
load_dotenv()

True

# Analysis TESTS: 1, 2, 3, 4

In [4]:
USER = os.getenv("POSTGRES_USER")
PASSWORD = os.getenv("POSTGRES_PASSWORD")
DB = os.getenv("POSTGRES_DB")
PORT = os.getenv("POSTGRES_PORT")
HOST = os.getenv("POSTGRES_HOST", "localhost")

# Criar engine SQLAlchemy
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}")


## Test 1

In [None]:
df_test1 = pd.read_sql("SELECT * FROM test1_results", engine)

In [9]:
df_test1['response'].value_counts()

response
real    1048
fake     952
Name: count, dtype: int64

In [14]:
y_true = df_test1['true_class']
y_pred = df_test1['response']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=y_true.unique())
cm_df = pd.DataFrame(cm, index=y_true.unique(), columns=y_true.unique())

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Class')
plt.xlabel('LLM Output')
plt.title('Test 1 - Confusion Matrix')
plt.savefig("./test1/confusion_matrix.png")  # Salva a figura como PNG
plt.close()  # Fecha a figura para não mostrar

# Classification report
report = classification_report(y_true, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("./test1/classification_report.csv")

print("✅ Confusion matrix e classification report salvos em arquivos CSV e PNG")

✅ Confusion matrix e classification report salvos em arquivos CSV e PNG


## Test 2

In [30]:
df_test2 = pd.read_sql("SELECT * FROM test2_results", engine)

In [31]:
df_test2['response'].value_counts()

response
real                        1168
fake                         672
real with misinformation     160
Name: count, dtype: int64

In [25]:
## real with misinformation =  fake
df_test2_1 = df_test2.copy()
df_test2_1['response'].replace({'real with misinformation': 'fake'}, inplace=True)


## real with misinformation =  real
df_test2_2 = df_test2.copy()
df_test2_2['response'].replace({'real with misinformation': 'real'}, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_test2_1['response'].replace({'real with misinformation': 'fake'}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_test2_2['response'].replace({'real with misinformation': 'real'}, inplace=True)


In [39]:
df_test2_1['response'].value_counts()

response
real    1168
fake     832
Name: count, dtype: int64

In [40]:
df_test2_2['response'].value_counts()

response
real    1328
fake     672
Name: count, dtype: int64

In [41]:
y_true = df_test2_1['true_class']
y_pred = df_test2_1['response']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=y_true.unique())
cm_df = pd.DataFrame(cm, index=y_true.unique(), columns=y_true.unique())

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Class')
plt.xlabel('LLM Output')
plt.title('Test 2 - Confusion Matrix (real with misinformation =  fake)')
plt.savefig("./test2/confusion_matrix_(real with misinformation =  fake).png")  # Salva a figura como PNG
plt.close()  # Fecha a figura para não mostrar

# Classification report
report = classification_report(y_true, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("./test2/classification_report_(real with misinformation =  fake).csv")

print("✅ Confusion matrix e classification report salvos em arquivos CSV e PNG")

✅ Confusion matrix e classification report salvos em arquivos CSV e PNG


In [42]:
y_true = df_test2_2['true_class']
y_pred = df_test2_2['response']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=y_true.unique())
cm_df = pd.DataFrame(cm, index=y_true.unique(), columns=y_true.unique())

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Class')
plt.xlabel('LLM Output')
plt.title('Test 2 - Confusion Matrix (real with misinformation =  real)')
plt.savefig("./test2/confusion_matrix_(real with misinformation =  real).png")  # Salva a figura como PNG
plt.close()  # Fecha a figura para não mostrar

# Classification report
report = classification_report(y_true, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("./test2/classification_report_(real with misinformation =  real).csv")

print("✅ Confusion matrix e classification report salvos em arquivos CSV e PNG")

✅ Confusion matrix e classification report salvos em arquivos CSV e PNG


## Test 3

In [36]:
df_test3 = pd.read_sql("SELECT * FROM test3_results", engine)

In [37]:
df_test3['response'].value_counts()

response
fake    1080
real     920
Name: count, dtype: int64

In [43]:
y_true = df_test3['true_class']
y_pred = df_test3['response']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=y_true.unique())
cm_df = pd.DataFrame(cm, index=y_true.unique(), columns=y_true.unique())

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Class')
plt.xlabel('LLM Output')
plt.title('Test 3 - Confusion Matrix')
plt.savefig("./test3/confusion_matrix.png")  # Salva a figura como PNG
plt.close()  # Fecha a figura para não mostrar

# Classification report
report = classification_report(y_true, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("./test3/classification_report.csv")

print("✅ Confusion matrix e classification report salvos em arquivos CSV e PNG")

✅ Confusion matrix e classification report salvos em arquivos CSV e PNG


## Test 4

In [44]:
df_test4 = pd.read_sql("SELECT * FROM test4_results", engine)

In [45]:
df_test4['response'].value_counts()

response
real    1143
fake     857
Name: count, dtype: int64

In [46]:
y_true = df_test4['true_class']
y_pred = df_test4['response']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=y_true.unique())
cm_df = pd.DataFrame(cm, index=y_true.unique(), columns=y_true.unique())

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Class')
plt.xlabel('LLM Output')
plt.title('Test 4 - Confusion Matrix')
plt.savefig("./test4/confusion_matrix.png")  # Salva a figura como PNG
plt.close()  # Fecha a figura para não mostrar

# Classification report
report = classification_report(y_true, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("./test4/classification_report.csv")

print("✅ Confusion matrix e classification report salvos em arquivos CSV e PNG")

✅ Confusion matrix e classification report salvos em arquivos CSV e PNG
