<a href="https://colab.research.google.com/github/EricRibeiroAlves/DeteccaoFakeNews/blob/main/Modelos/%5BAP_T%5D_TI_Modelo_Gen%C3%A9rico.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import itertools
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from google.colab import drive

In [None]:
# Importação do Dataset ISOT Fake News Dataset

# Carregar os dados
drive.mount('/content/drive')
dat_fake = "/content/drive/MyDrive/Eng. Controle e Automação/8º Semestre/AP/dataset_FakeNews/Fake.csv"
dat_real = "/content/drive/MyDrive/Eng. Controle e Automação/8º Semestre/AP/dataset_FakeNews/True.csv"
dt_fake = pd.read_csv(dat_fake)
dt_real = pd.read_csv(dat_real)

dt_fake['label'] = 'fake'
dt_real['label'] = 'real'

dt = pd.concat([dt_fake, dt_real], ignore_index=True)
dt = dt[(dt['subject'] == 'News') | (dt['subject'] == 'worldnews')]

dt.shape

Mounted at /content/drive


(19195, 5)

In [None]:
dt.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",fake
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",fake
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",fake
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",fake
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",fake


In [None]:
i=dt.label.value_counts()

In [None]:
fig=go.Figure(data=[go.Bar(x=['real','fake'], y=i,textposition='auto')])

fig.show()

In [None]:
X_train, X_test, y_train, y_test=train_test_split(dt['text'], dt.label, test_size=0.2, random_state=7)

In [None]:
y_test

Unnamed: 0,label
43533,real
35950,real
6558,fake
44365,real
38492,real
...,...
43160,real
6859,fake
8644,fake
42728,real


In [None]:
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)

In [None]:
dfidf_train=tfidf_vectorizer.fit_transform(X_train)
tfidf_test=tfidf_vectorizer.transform(X_test)

In [None]:
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(dfidf_train,y_train)

In [None]:
y_pred=pac.predict(tfidf_test)

In [None]:
score=accuracy_score(y_test,y_pred)
print(f'Accuracy:{round(score*100,2)}%')

Accuracy:99.79%


In [None]:
# Matriz de confusão
conf_matrix = confusion_matrix(y_test, y_pred)

# Exibir matriz de confusão
print("Matriz de Confusão:")
print(conf_matrix)

# Visualizar matriz de confusão com Plotly
categories = ['Real', 'Fake']
fig = go.Figure(
    data=go.Heatmap(
        z=conf_matrix,
        x=categories,
        y=categories,
        colorscale="Viridis",
        text=conf_matrix,
        texttemplate="%{text}"
    )
)
fig.update_layout(
    title="Matriz de Confusão",
    xaxis_title="Previsto",
    yaxis_title="Verdadeiro"
)
fig.show()


Matriz de Confusão:
[[1798    1]
 [   7 2033]]


In [None]:
# Relatório de classificação
report = classification_report(y_test, y_pred)
print("Relatório de Classificação:")
print(report)


Relatório de Classificação:
              precision    recall  f1-score   support

        fake       1.00      1.00      1.00      1799
        real       1.00      1.00      1.00      2040

    accuracy                           1.00      3839
   macro avg       1.00      1.00      1.00      3839
weighted avg       1.00      1.00      1.00      3839



In [None]:
# Exemplos para teste manual
novos_exemplos = [
    "Study confirms that drinking warm water with lemon cures cancer in a week",
    "NASA confirms discovery of organic molecules on Mars by the Curiosity rover.",
]

# Transformar novos exemplos em vetores TF-IDF
novos_exemplos_tfidf = tfidf_vectorizer.transform(novos_exemplos)

# Prever os rótulos
predicoes = pac.predict(novos_exemplos_tfidf)

# Exibir os resultados
for exemplo, predicao in zip(novos_exemplos, predicoes):
    print(f"Texto: {exemplo}\nPredição: {predicao}\n")


Texto: Study confirms that drinking warm water with lemon cures cancer in a week
Predição: fake

Texto: NASA confirms discovery of organic molecules on Mars by the Curiosity rover.
Predição: real

