Load all required libraries for NLP + ML

In [3]:
# -----------------------------------
# FIX IMPORT PATHS
# -----------------------------------
import sys
sys.path.append(r"c:\FakeNews")   # <-- makes 'src' visible to Python
print("sys.path ok")

# -----------------------------------
# Standard imports
# -----------------------------------
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, classification_report

import joblib

# -----------------------------------
# Import preprocessing function
# -----------------------------------
from src.preprocess import clean_text


sys.path ok


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DikelediMaholo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DikelediMaholo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
import os
print("Current working directory:", os.getcwd())
print("Contents of project root:", os.listdir(".."))
print("Contents of this folder:", os.listdir("."))


Current working directory: c:\FakeNews\Notebooks
Contents of project root: ['app', 'Data', 'models', 'Notebooks', 'src', 'venv']
Contents of this folder: ['01_data_exploration.ipynb', '02_model_training.ipynb']


Load Your Data

In [4]:
# -----------------------------------
# Load Dataset (True + Fake)
# -----------------------------------
true = pd.read_csv("../Data/True.csv")
fake = pd.read_csv("../Data/Fake.csv")

# Add labels
true['label'] = 1   # Real news
fake['label'] = 0   # Fake news

# Combine and shuffle
df = pd.concat([true, fake], axis=0).sample(frac=1, random_state=42)

df = df[['text', 'label']]
df.head()



Unnamed: 0,text,label
799,"Donald Trump s White House is in chaos, and th...",0
6500,Now that Donald Trump is the presumptive GOP n...,0
3590,Mike Pence is a huge homophobe. He supports ex...,0
1377,SAN FRANCISCO (Reuters) - California Attorney ...,1
11059,Twisted reasoning is all that comes from Pelos...,0


Cleaning the text

In [8]:
# -----------------------------------
# Clean the text using preprocess.py
# -----------------------------------
df['clean'] = df['text'].apply(clean_text)

df[['text', 'clean']].head()


Unnamed: 0,text,clean
799,"Donald Trump s White House is in chaos, and th...",donald trump white house chaos trying cover ru...
6500,Now that Donald Trump is the presumptive GOP n...,donald trump presumptive gop nominee time reme...
3590,Mike Pence is a huge homophobe. He supports ex...,mike penny huge homophobe support ex gay conve...
1377,SAN FRANCISCO (Reuters) - California Attorney ...,san francisco reuters california attorney gene...
11059,Twisted reasoning is all that comes from Pelos...,twisted reasoning come pelosi day especially p...


Training and Testing Split

In [9]:
# -----------------------------------
# Train-test split
# -----------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    df['clean'], df['label'], test_size=0.2, random_state=42
)


TF-IDF Vectorization

In [10]:
# -----------------------------------
# TF-IDF Vectorizer
# -----------------------------------
tfidf = TfidfVectorizer(
    max_features=50000,        # large vocabulary helps
    ngram_range=(1, 2)         # unigrams + bigrams (strong boost)
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

X_train_tfidf.shape, X_test_tfidf.shape


((35918, 50000), (8980, 50000))

Train All Classical ML Models

6.1 Train Logistic Regression

In [11]:
# -----------------------------------
# Logistic Regression
# -----------------------------------
lr = LogisticRegression(max_iter=500)
lr.fit(X_train_tfidf, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,500


6.2 Train Linear SVM

In [12]:
# -----------------------------------
# Linear SVM
# -----------------------------------
svm = LinearSVC()
svm.fit(X_train_tfidf, y_train)


0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0


6.3 Train Multinomial Naive Bayes

In [13]:
# -----------------------------------
# Multinomial Naive Bayes
# -----------------------------------
nb = MultinomialNB()
nb.fit(X_train_tfidf, y_train)


0,1,2
,alpha,1.0
,force_alpha,True
,fit_prior,True
,class_prior,


Evaluate All Models

In [14]:
# -----------------------------------
# Evaluate all models
# -----------------------------------
models = {
    "Logistic Regression": lr,
    "Linear SVM": svm,
    "Multinomial NB": nb,
}

for name, model in models.items():
    preds = model.predict(X_test_tfidf)
    print("\nMODEL:", name)
    print("Accuracy:", accuracy_score(y_test, preds))
    print(classification_report(y_test, preds))



MODEL: Logistic Regression
Accuracy: 0.9889755011135858
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4669
           1       0.99      0.99      0.99      4311

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980


MODEL: Linear SVM
Accuracy: 0.9966592427616926
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4669
           1       1.00      1.00      1.00      4311

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980


MODEL: Multinomial NB
Accuracy: 0.9530066815144767
              precision    recall  f1-score   support

           0       0.95      0.96      0.95      4669
           1       0.95      0.95      0.95      4311

    accuracy             

Save all the models

In [15]:
# -----------------------------------
# Save all classical models + vectorizer
# -----------------------------------
joblib.dump(tfidf, "../models/tfidf_vectorizer.pkl")
joblib.dump(lr, "../models/logistic_model.pkl")
joblib.dump(svm, "../models/svm_model.pkl")
joblib.dump(nb, "../models/nb_model.pkl")

print("Models saved!")


Models saved!


In [16]:
results = []

for name, model in models.items():
    preds = model.predict(X_test_tfidf)
    results.append({"Model": name, "Accuracy": accuracy_score(y_test, preds)})

pd.DataFrame(results).sort_values(by="Accuracy", ascending=False)


Unnamed: 0,Model,Accuracy
1,Linear SVM,0.996659
0,Logistic Regression,0.988976
2,Multinomial NB,0.953007


Deep Learning Models:TRAIN A BiLSTM MODEL

In [17]:
# -----------------------------------
# Deep Learning Imports
# -----------------------------------
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout


Tokenize and Pad Sequences

LSTMs cannot read text — we convert text → numbers.

In [18]:
# -----------------------------------
# Tokenizer settings
# -----------------------------------
MAX_VOCAB = 50000     # number of unique words to keep
MAX_LEN = 300         # max tokens per news article

tokenizer = Tokenizer(num_words=MAX_VOCAB, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences of integers
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences to same length
X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding='post')

X_train_pad.shape, X_test_pad.shape


((35918, 300), (8980, 300))

Build the BiLSTM Model

In [20]:
# -----------------------------------
# Build BiLSTM Model
# -----------------------------------
model_lstm = Sequential([
    Embedding(MAX_VOCAB, 128, input_length=MAX_LEN),
    Bidirectional(LSTM(128, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model_lstm.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)
model_lstm.build(input_shape=(None, MAX_LEN))
model_lstm.summary()


model_lstm.summary()


TRAIN THE LSTM

In [21]:
history = model_lstm.fit(
    X_train_pad,
    y_train,
    validation_split=0.2,
    epochs=3,         # you can increase this later
    batch_size=128,
    verbose=1
)


Epoch 1/3
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 1s/step - accuracy: 0.9562 - loss: 0.1053 - val_accuracy: 0.9919 - val_loss: 0.0270
Epoch 2/3
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 1s/step - accuracy: 0.9978 - loss: 0.0077 - val_accuracy: 0.9946 - val_loss: 0.0208
Epoch 3/3
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 1s/step - accuracy: 0.9993 - loss: 0.0025 - val_accuracy: 0.9937 - val_loss: 0.0239


In [22]:
loss, acc = model_lstm.evaluate(X_test_pad, y_test)
print("BiLSTM Accuracy:", acc)


[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 132ms/step - accuracy: 0.9939 - loss: 0.0197
BiLSTM Accuracy: 0.99387526512146


DistilBERT Training Setup:Install Hugging Face Transformers

In [5]:
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [6]:
# -----------------------------------
# Load DistilBERT tokenizer
# -----------------------------------
tokenizer_bert = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')


Train/Test Split for BERT

In [3]:
import pandas as pd

In [5]:
# -----------------------------------
# Train/test split for BERT (use original text)
# -----------------------------------
X_train_bert, X_test_bert, y_train_bert, y_test_bert = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)


okenize the dataset for BERT

In [8]:
# -----------------------------------
# Tokenize text for BERT
# -----------------------------------
train_encodings = tokenizer_bert(
    X_train_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

test_encodings = tokenizer_bert(
    X_test_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)


Convert to TensorFlow Datasets

In [6]:
# -----------------------------------
# Convert tokenized data into TensorFlow datasets
# -----------------------------------
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    y_train_bert.values
)).shuffle(1000).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    y_test_bert.values
)).batch(16)


NameError: name 'tf' is not defined

In [1]:
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
tokenizer_bert = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')


  from .autonotebook import tqdm as notebook_tqdm





Load the DistilBERT Model

In [None]:
model_bert = TFDistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=2,
    cache_dir="../models/hf_cache"
)



In [1]:
from transformers import ElectraTokenizerFast, TFElectraForSequenceClassification
import tensorflow as tf


  from .autonotebook import tqdm as notebook_tqdm





In [2]:
tokenizer_bert = ElectraTokenizerFast.from_pretrained(
    "google/electra-small-discriminator"
)

model_bert = TFElectraForSequenceClassification.from_pretrained(
    "google/electra-small-discriminator",
    num_labels=2
)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initializ

In [12]:
train_encodings = tokenizer_bert(
    X_train_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

test_encodings = tokenizer_bert(
    X_test_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)


In [13]:
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    y_train_bert.values
)).shuffle(1000).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    y_test_bert.values
)).batch(16)


In [14]:
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)

model_bert.compile(
    optimizer=optimizer,
    loss=model_bert.compute_loss,
    metrics=['accuracy']
)


In [15]:
history_bert = model_bert.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=2
)


Epoch 1/2


AttributeError: in user code:

    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\training.py", line 1398, in train_function  *
        return step_function(self, iterator)
    File "c:\FakeNews\venv\Lib\site-packages\transformers\modeling_tf_utils.py", line 1572, in compute_loss  *
        return super().compute_loss(*args, **kwargs)
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\training.py", line 1206, in compute_loss  **
        return self.compiled_loss(
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 275, in __call__
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 854, in match_dtype_and_rank
        if (y_t.dtype.is_floating and y_p.dtype.is_floating) or (

    AttributeError: 'NoneType' object has no attribute 'dtype'


In [16]:
import tensorflow as tf

y_train_tensor = tf.convert_to_tensor(y_train_bert.values, dtype=tf.int32)
y_test_tensor = tf.convert_to_tensor(y_test_bert.values, dtype=tf.int32)

train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    y_train_tensor
)).shuffle(1000).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    y_test_tensor
)).batch(16)


In [17]:
history_bert = model_bert.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=2
)


Epoch 1/2


AttributeError: in user code:

    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\training.py", line 1398, in train_function  *
        return step_function(self, iterator)
    File "c:\FakeNews\venv\Lib\site-packages\transformers\modeling_tf_utils.py", line 1572, in compute_loss  *
        return super().compute_loss(*args, **kwargs)
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\training.py", line 1206, in compute_loss  **
        return self.compiled_loss(
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 275, in __call__
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    File "c:\FakeNews\venv\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 854, in match_dtype_and_rank
        if (y_t.dtype.is_floating and y_p.dtype.is_floating) or (

    AttributeError: 'NoneType' object has no attribute 'dtype'


In [1]:
# 1. Imports
from transformers import ElectraTokenizerFast, ElectraForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm





In [7]:
# 2. Load tokenizer
tokenizer_bert = ElectraTokenizerFast.from_pretrained("google/electra-small-discriminator")

# 3. Tokenize dataset
train_encodings = tokenizer_bert(
    X_train_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

In [8]:
test_encodings = tokenizer_bert(
    X_test_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

In [9]:
# 4. PyTorch Dataset Class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(int(self.labels[idx]))  # int32 label
        return item

    def __len__(self):
        return len(self.labels)

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import classification_report
import joblib
import os
import sys

# --- FIX 1: Ensure imports and paths work ---
# Assuming 'src/' is available in the Python path
try:
    from src.preprocess import clean_text
except ImportError:
    # If the import fails, manually add the project root to the path
    ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath('02_model_training.ipynb')))
    sys.path.append(ROOT_DIR)
    from src.preprocess import clean_text

# --- FIX 2: Re-create the missing variables (X_train_tfidf and y_train) ---

# A. Load Data and Labels (As done in Cell 4)
true = pd.read_csv("../Data/True.csv")
fake = pd.read_csv("../Data/Fake.csv")
true['label'] = 1
fake['label'] = 0
df = pd.concat([true, fake], axis=0).sample(frac=1, random_state=42)

# B. Clean and Split Data (As done in Cells 8 & 9)
df['clean'] = df['text'].apply(clean_text)
X_train, X_test, y_train, y_test = train_test_split(
    df['clean'], df['label'], test_size=0.2, random_state=42
)

# C. Load Vectorizer and Transform Training Data (As done in Cell 10)
try:
    tfidf = joblib.load("../models/tfidf_vectorizer.pkl")
    X_train_tfidf = tfidf.transform(X_train)
    X_test_tfidf = tfidf.transform(X_test)
except FileNotFoundError:
    print("ERROR: tfidf_vectorizer.pkl not found. Please run all preceding cells in the original notebook section.")
    sys.exit() # Stop execution if files aren't found


# --- FIX 3: Run the Calibration Code ---

print("Starting Calibrated SVM Training...")

# A. Re-initialize the base LinearSVC
svc_base = LinearSVC(
    max_iter=1000, 
    loss='squared_hinge', 
    dual='auto',
    random_state=42 
)

# B. Wrap the SVM in a CalibratedClassifierCV 
calibrated_svc = CalibratedClassifierCV(svc_base, cv=5, method='sigmoid')

# C. Fit the new model on your training data (This is the line that failed before)
calibrated_svc.fit(X_train_tfidf, y_train)

print("Calibrated SVM Training Complete. Evaluating...")

# ----------------------------------------------------
# 3. Evaluate and Save the New Model (Original Code)
# ----------------------------------------------------

calibrated_preds = calibrated_svc.predict(X_test_tfidf)

print("\nCalibrated SVM Test Performance:")
print(classification_report(y_test, calibrated_preds))

# Save the new model
JOB_MODEL_FILE = "../models/calibrated_svm.pkl"
joblib.dump(calibrated_svc, JOB_MODEL_FILE)

print(f"\nSuccessfully saved Calibrated SVM model to {JOB_MODEL_FILE}")

# ----------------------------------------------------
# 4. Save Features for Explainability (Original Code)
# ----------------------------------------------------

# We load the original SVM to get the feature weights for the web app's explainability feature
try:
    svm_base_for_weights = joblib.load("../models/svm_model.pkl")

    feature_data = {
        'feature_names': tfidf.get_feature_names_out().tolist(),
        'coefficients': svm_base_for_weights.coef_[0].tolist()
    }
    joblib.dump(feature_data, "../models/svm_features.pkl")
    print("\nSaved SVM feature weights for explainability.")
except FileNotFoundError:
    print("\nWarning: Could not find svm_model.pkl to extract feature weights. Explainability features will not work.")

[nltk_data] Error loading stopwords: [WinError 10054] An existing
[nltk_data]     connection was forcibly closed by the remote host
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DikelediMaholo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Starting Calibrated SVM Training...
Calibrated SVM Training Complete. Evaluating...

Calibrated SVM Test Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4669
           1       1.00      1.00      1.00      4311

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980


Successfully saved Calibrated SVM model to ../models/calibrated_svm.pkl

Saved SVM feature weights for explainability.


In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report # For final check

# --- 1. Prepare Data for Prediction ---
# This ensures the test text is correctly vectorized.
X_test_tfidf = tfidf.transform(X_test)

# --- 2. Run Predictions and Probabilities ---
# Get the hard class prediction (0 or 1)
y_pred_class = calibrated_svc.predict(X_test_tfidf)

# Get the probabilities: returns [[P(Fake), P(Real)], ...]
y_pred_proba = calibrated_svc.predict_proba(X_test_tfidf)

# Extract the confidence score (the max probability for the predicted class)
confidence_scores = np.max(y_pred_proba, axis=1)

# --- 3. Create the Final DataFrame ---
results_df = pd.DataFrame({
    # The cleaned input text
    'Original_Text': X_test.tolist(),  
    # The actual label
    'True_Label': y_test.tolist(),
    # The model's prediction
    'Predicted_Label': y_pred_class.tolist(),
    # The model's confidence level
    'Confidence_Score': confidence_scores.round(4).tolist(), 
})

# --- 4. Map Numerical Labels to Names for Readability ---
label_map = {0: 'Fake', 1: 'Real'}
results_df['True_Label_Name'] = results_df['True_Label'].map(label_map)
results_df['Predicted_Label_Name'] = results_df['Predicted_Label'].map(label_map)

# Select and reorder the final columns for the output file
final_output_df = results_df[['Original_Text', 'True_Label_Name', 'Predicted_Label_Name', 'Confidence_Score']]

# --- 5. SAVE THE RESULTS TO A FILE ---
OUTPUT_CSV_PATH = "../predicted_results.csv" # Saves one directory up (in the FAKENEWS root)
final_output_df.to_csv(OUTPUT_CSV_PATH, index=False)

print("\n--- RESULTS SUMMARY ---")
print(f"✅ Prediction results successfully saved to: {OUTPUT_CSV_PATH}")
print("File columns: Original_Text, True_Label_Name, Predicted_Label_Name, Confidence_Score")

# Display a performance check and the head of the file
print("\nModel Performance on Test Set:")
print(classification_report(y_test, y_pred_class))

print("\nFirst 5 Saved Results:")
print(final_output_df.head())


--- RESULTS SUMMARY ---
✅ Prediction results successfully saved to: ../predicted_results.csv
File columns: Original_Text, True_Label_Name, Predicted_Label_Name, Confidence_Score

Model Performance on Test Set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4669
           1       1.00      1.00      1.00      4311

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980


First 5 Saved Results:
                                       Original_Text True_Label_Name  \
0  washington reuters u president donald trump di...            Real   
1  welcome trump america k trumplandia people say...            Fake   
2  election result tonight hard swallow many u do...            Fake   
3  washington reuters republican national committ...            Real   
4  plan republican convention suffered embarrassi...            Fake   

  Pre

In [None]:
import os
import pathlib

# Define the path for the hidden Streamlit configuration folder
CONFIG_DIR = pathlib.Path(".streamlit")

# Define the content for the custom theme file
CONFIG_CONTENT = """
[theme]
primaryColor="#DC143C"  
backgroundColor="#0E1117"
secondaryBackgroundColor="#262730"
textColor="#FAFAFA"
font="sans serif"
"""

# 1. Create the .streamlit folder if it does not exist
if not CONFIG_DIR.exists():
    CONFIG_DIR.mkdir()
    print(f"✅ Created directory: {CONFIG_DIR}")
else:
    print(f"Directory already exists: {CONFIG_DIR}")

# 2. Write the theme content to config.toml inside the new folder
CONFIG_FILE = CONFIG_DIR / "config.toml"
try:
    with open(CONFIG_FILE, "w") as f:
        f.write(CONFIG_CONTENT.strip())
    print(f"✅ Successfully wrote custom theme to: {CONFIG_FILE}")
except Exception as e:
    print(f"Error writing file: {e}")

✅ Created directory: .streamlit
✅ Successfully wrote custom theme to: .streamlit\config.toml


: 