In [24]:
import pandas as pd
import numpy as np
import os
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Concatenate
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from preprocess import clean_text
import pickle


In [12]:
fake_df = pd.read_csv(r'C:\Users\Admin\Desktop\Image gen\fakenews(text+image)\Articles\Fake.csv')
real_df = pd.read_csv(r'C:\Users\Admin\Desktop\Image gen\fakenews(text+image)\Articles\Real.csv')

# Add labels
fake_df['label'] = 0
real_df['label'] = 1

# Optional: create dummy image paths if not present
# fake_df['image_path'] = 'dummy_fake.jpg'
# real_df['image_path'] = 'dummy_real.jpg'

# Combine into one DataFrame
df = pd.concat([fake_df, real_df], ignore_index=True)

# Shuffle the rows
df = df.sample(frac=1).reset_index(drop=True)

print(df.head())
print(df.tail())

                                               title  \
0  CREEPY BERNIE Calls For Trump To Step Down…Wha...   
1  NOT SO FAST: CA LIBS TRY TO ‘Drought Shame’ Co...   
2   Trump Is Bragging About His Approval Rating, ...   
3  Frankfurt defuses massive WWII bomb after evac...   
4  Maltese prime minister promises reward to unco...   

                                                text    subject  \
0  Yesterday, Senator Bernie Sanders, I-Vt., went...   politics   
1  When will the libs start Delta Smelt shaming a...   politics   
2  The current occupant of the White House used t...       News   
3  FRANKFURT (Reuters) - German explosives expert...  worldnews   
4  VALLETTA (Reuters) - Prime Minister Joseph Mus...  worldnews   

                 date  label  
0         Dec 8, 2017      0  
1         Jul 9, 2015      0  
2        May 25, 2017      0  
3  September 3, 2017       1  
4   October 18, 2017       1  
                                                   title  \
44893  #YouveB

In [13]:
df['clean_text'] = df['text'].apply(clean_text)


In [14]:
print(df.tail())

                                                   title  \
44893  #YouveBeenTrumped…Journalist Who Tried To Nail...   
44894  Tillerson accuses Iran of 'alarming provocatio...   
44895  Lockheed nears $27 billion helicopter deal wit...   
44896  TRUMP’S GREAT ANSWER On Terror Attack: “It’s a...   
44897  RADICAL “OCCUPY” MOM WHO LEFT 4 KIDS AND HUSBA...   

                                                    text       subject  \
44893  This is the dumbest and biggest crime in the h...      politics   
44894  WASHINGTON (Reuters) - U.S. Secretary of State...  politicsNews   
44895  (Reuters) - Lockheed Martin Corp is expected t...  politicsNews   
44896                                                         politics   
44897  Occupy Wall Street 2011:The fun is over for a ...     left-news   

                  date  label  \
44893     Jul 11, 2017      0   
44894  April 19, 2017       1   
44895  March 28, 2017       1   
44896     Dec 22, 2016      0   
44897      Sep 9, 2015      0

In [15]:
tfidf = TfidfVectorizer(max_features=500)
X_text = tfidf.fit_transform(df['clean_text']).toarray()
y = df['label'].values

# Load image data
img_dir = "Images"  # Should have 'Real' and 'Fake' folders
img_data = []
img_labels = []
label_map = {"Real": 1, "Fake": 0}

for label_folder in os.listdir(img_dir):
    path = os.path.join(img_dir, label_folder)
    if not os.path.isdir(path):
        continue
    for img_name in os.listdir(path):
        try:
            img_path = os.path.join(path, img_name)
            img = load_img(img_path, target_size=(224, 224))
            img = img_to_array(img) / 255.0
            img_data.append(img)
            img_labels.append(label_map[label_folder])
        except:
            continue

In [25]:
X_img = np.array(img_data)
y_img = np.array(img_labels)

# Align text and image data
min_len = min(len(X_text), len(X_img), len(y_img))
X_text = X_text[:min_len]
X_img = X_img[:min_len]
y = y_img[:min_len]

if len(y.shape) == 1:
    y = to_categorical(y, 2)

# Train-test split
X_text_train, X_text_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X_text, X_img, y, test_size=0.2, random_state=42
)

# Model
text_input = Input(shape=(X_text.shape[1],))
text_branch = Dense(128, activation='relu')(text_input)

# --- Updated image branch with MobileNetV2 ---
base_model = MobileNetV2(include_top=False, input_shape=(224, 224, 3), pooling='avg', weights='imagenet')
base_model.trainable = False  # Freeze pretrained layers

img_input = Input(shape=(224, 224, 3))
x = base_model(img_input)
img_branch = Dense(128, activation='relu')(x)

# Merge text and image
merged = Concatenate()([text_branch, img_branch])
merged = Dense(64, activation='relu')(merged)
output = Dense(2, activation='softmax')(merged)

model = Model(inputs=[text_input, img_input], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit([X_text_train, X_img_train], y_train, epochs=5, validation_data=([X_text_test, X_img_test], y_test))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1us/step
Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 50s/step - accuracy: 0.5000 - loss: 0.8184 - val_accuracy: 0.5000 - val_loss: 0.7698
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5000 - loss: 0.6759 - val_accuracy: 0.5000 - val_loss: 0.5727
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.8125 - loss: 0.4253 - val_accuracy: 1.0000 - val_loss: 0.3783
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 1.0000 - loss: 0.3041 - val_accuracy: 0.7500 - val_loss: 0.3311
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 1.0000 - loss: 0.2734 - val_accuracy

<keras.src.callbacks.history.History at 0x1a8d24dd4b0>

In [None]:
#Save

model.save("models/text_image_model.keras")
with open("models/tfidf.pkl", "wb") as f:
    pickle.dump(tfidf, f)