# Classification Fake News Climat – Notebook ML

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, learning_curve, train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [17]:
df = pd.read_csv(r"..\data\fakenews-climate-fr.csv")
df.head()

Unnamed: 0,Text,Label
0,\r\nUne réflexion collective qui est aussi quo...,fake
1,\r\nDès l’élaboration de la Convention-cadre d...,true
2,"\r\n""On peut parler d'invasion, d'invasion gén...",true
3,"\r\nLa Rochelle, le 14 mars 2008. Un navire en...",biased
4,\r\nJ’ai eu le plaisir de visiter le Japon il ...,fake


## Préparation des données

In [18]:
X = df['Text']
y = df['Label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## 1) Naive Bayes baseline

In [4]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train).toarray() 

In [5]:
model = GaussianNB()
model.fit(X_train, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [6]:
X_test = vectorizer.transform(X_test).toarray()
y_pred = model.predict(X_test)

In [7]:
report_1 = classification_report(y_test, y_pred)
print(report_1)

              precision    recall  f1-score   support

      biased       0.29      0.06      0.10        63
        fake       0.52      0.69      0.59       127
        true       0.77      0.78      0.78       297

    accuracy                           0.67       487
   macro avg       0.52      0.51      0.49       487
weighted avg       0.64      0.67      0.64       487



In [8]:
matrix_1 = confusion_matrix(y_test, y_pred)
px.imshow(matrix_1, labels={'x': 'Predicted', 'y': 'Actual'}, text_auto=True).show()

In [9]:
N, train_score, val_score = learning_curve(model, X_train, y_train, cv=5, scoring='f1_micro', train_sizes=np.linspace(0.1, 1.0, 10))

fig = go.Figure()
fig.add_trace(go.Scatter(x=N, y=train_score.mean(axis=1), mode='lines', name='train score'))
fig.add_trace(go.Scatter(x=N, y=val_score.mean(axis=1), mode='lines', name='validation score'))
fig.show()