In [1]:
import sys
print(sys.executable)


c:\Users\moham\AA\python.exe


In [3]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


In [1]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from PIL import Image
from transformers import CLIPProcessor, CLIPModel, BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import gc



In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to("cpu")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

bert_model = BertModel.from_pretrained("bert-base-uncased").to("cpu")
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")



for param in bert_model.parameters():
    param.requires_grad = True

bert_model.train()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [3]:
df = pd.read_csv("Multimodal_FakeNews_Sample_1999_Clean.csv")  


df['label'] = df['label'].map({'fake': 0, 'real': 1})  
df.dropna(subset=["text", "image_filename"], inplace=True)


In [12]:
def get_multimodal_features(row):
    try:
        image_path = f"fake_news_images_1999/{row['image_filename']}"
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        print(f"[ERROR] Cannot open image for {row['image_filename']}: {e}")
        return None

    text = row['text'][:512]

    try:
        clip_inputs = clip_processor(images=image, return_tensors="pt")
        image_features = clip_model.get_image_features(**clip_inputs)

        encoded = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        bert_outputs = bert_model(**encoded)
        text_features = bert_outputs.pooler_output
        text_features*=2

        return torch.cat((text_features, image_features), dim=1).squeeze(0)
    except Exception as e:
        print(f"[ERROR] Feature extraction failed for {row['image_filename']}: {e}")
        return None


In [13]:
import os
import torch
from tqdm import tqdm

os.makedirs("features", exist_ok=True)
features_dir = "features"
labels = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    feat = get_multimodal_features(row)
    if feat is not None:
        feature_path = os.path.join(features_dir, row['image_filename'].replace('.jpg', '.pt'))
        torch.save(feat, feature_path)

        labels.append(row['label'])


100%|██████████| 1999/1999 [09:35<00:00,  3.47it/s]


In [14]:
import glob

feature_paths = sorted(glob.glob("features/*.pt"))
X = torch.stack([torch.load(p) for p in feature_paths])
y = torch.tensor(labels, dtype=torch.long)


In [None]:
if isinstance(X, torch.Tensor):
    X = X.detach().cpu().numpy()

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

torch_X_train = torch.tensor(X_train, dtype=torch.float32)
torch_y_train = torch.tensor(y_train, dtype=torch.long)
torch_X_val = torch.tensor(X_val, dtype=torch.float32)
torch_y_val = torch.tensor(y_val, dtype=torch.long)
torch_X_test = torch.tensor(X_test, dtype=torch.float32)
torch_y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(torch_X_train, torch_y_train)
val_dataset = TensorDataset(torch_X_val, torch_y_val)
test_dataset = TensorDataset(torch_X_test, torch_y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

class FakeNewsNN(nn.Module):
    def __init__(self, input_size):
        super(FakeNewsNN, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        return self.classifier(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FakeNewsNN(X.shape[1]).to(device)

# Apply class weights to handle imbalance
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight("balanced", classes=np.unique(y_train.numpy()), y=y_train.numpy())
weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)  # L2 regularization
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
best_val_acc = 0
patience = 3
patience_counter = 0

for epoch in range(30):
    model.train()
    total_loss = 0
    train_preds, train_trues = [], []

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        train_preds.extend(preds)
        train_trues.extend(batch_y.detach().cpu().numpy())

    train_acc = np.mean(np.array(train_preds) == np.array(train_trues))
    train_losses.append(total_loss / len(train_loader))
    train_accuracies.append(train_acc)

    model.eval()
    val_loss = 0
    val_preds, val_trues = [], []
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_trues.extend(batch_y.cpu().numpy())

    val_acc = np.mean(np.array(val_preds) == np.array(val_trues))
    val_losses.append(val_loss / len(val_loader))
    val_accuracies.append(val_acc)
    scheduler.step(val_acc)

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_counter = 0
        torch.save(model.state_dict(), "best_model.pt")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break


  torch_y_train = torch.tensor(y_train, dtype=torch.long)
  torch_y_val = torch.tensor(y_val, dtype=torch.long)
  torch_y_test = torch.tensor(y_test, dtype=torch.long)


Epoch 1, Loss: 0.7262, Train Acc: 0.5371, Val Acc: 0.4906
Epoch 2, Loss: 0.7052, Train Acc: 0.5379, Val Acc: 0.5281
Epoch 3, Loss: 0.6762, Train Acc: 0.5911, Val Acc: 0.5125
Epoch 4, Loss: 0.6632, Train Acc: 0.6075, Val Acc: 0.4875
Epoch 5, Loss: 0.6296, Train Acc: 0.6333, Val Acc: 0.4781
Early stopping triggered.


In [18]:
model.eval()
y_preds, y_trues = [], []
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        outputs = model(batch_x)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        y_preds.extend(preds)
        y_trues.extend(batch_y.numpy())

print("\nClassification Report:")
print(classification_report(y_trues, y_preds))



Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.69      0.59       207
           1       0.48      0.31      0.38       193

    accuracy                           0.51       400
   macro avg       0.50      0.50      0.49       400
weighted avg       0.50      0.51      0.49       400



In [20]:
!pip uninstall matplotlib -y
!pip install matplotlib --upgrade


Found existing installation: matplotlib 3.10.1


error: uninstall-no-record-file

× Cannot uninstall matplotlib 3.10.1
╰─> The package's contents are unknown: no RECORD file was found for matplotlib.

hint: You might be able to recover from this via: pip install --force-reinstall --no-deps matplotlib==3.10.1


Collecting matplotlib
  Using cached matplotlib-3.10.3-cp312-cp312-win_amd64.whl.metadata (11 kB)
Using cached matplotlib-3.10.3-cp312-cp312-win_amd64.whl (8.1 MB)
Installing collected packages: matplotlib
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.10.1


error: uninstall-no-record-file

× Cannot uninstall matplotlib 3.10.1
╰─> The package's contents are unknown: no RECORD file was found for matplotlib.

hint: You might be able to recover from this via: pip install --force-reinstall --no-deps matplotlib==3.10.1


In [21]:
import matplotlib.pyplot as plt
plt.plot([1, 2, 3], [4, 5, 6])
plt.title("Test Plot")
plt.show()


ModuleNotFoundError: No module named 'matplotlib.backends.registry'

In [22]:
import matplotlib
print(matplotlib.__file__)


ModuleNotFoundError: No module named 'matplotlib.backends.registry'

In [23]:
import site
site.getsitepackages()


['c:\\Users\\moham\\AA', 'c:\\Users\\moham\\AA\\Lib\\site-packages']

In [24]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

plt.tight_layout()
plt.show()

ModuleNotFoundError: No module named 'matplotlib.pyplot'