References:
- reading materials https://developers.google.com/machine-learning/gan/gan_structure


In [3]:
!pip install scikit-learn torch




In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Load the dataset
df = pd.read_csv('fake.csv')

# If label column isn't present, assign label 0 (fake)
if 'label' not in df.columns:
    df['label'] = 0  # Assume all fake.csv entries are fake news

# Drop rows with missing text
df = df.dropna(subset=['text'])

# Use a subset for faster training (optional)
df = df.sample(frac=1.0, random_state=42).reset_index(drop=True)

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X = vectorizer.fit_transform(df['text']).toarray()
y = df['label'].values  # Binary labels (0 = fake, 1 = real)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)


In [4]:
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim, num_classes=2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes + 1)  # 2 real classes + 1 fake class
        )

    def forward(self, x):
        return self.model(x)


In [5]:
# Hyperparameters
noise_dim = 100
input_dim = X_train.shape[1]
batch_size = 64
epochs = 10

# Initialize models and optimizers
gen = Generator(noise_dim, input_dim)
disc = Discriminator(input_dim)
g_opt = optim.Adam(gen.parameters(), lr=0.001)
d_opt = optim.Adam(disc.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(epochs):
    permutation = torch.randperm(X_train.size(0))

    for i in range(0, X_train.size(0), batch_size):
        indices = permutation[i:i+batch_size]
        real_data = X_train[indices]
        real_labels = y_train[indices]

        b_size = real_data.size(0)

        # ---- Train Discriminator ----
        z = torch.randn(b_size, noise_dim)
        fake_data = gen(z).detach()

        real_output = disc(real_data)
        fake_output = disc(fake_data)

        # real class = 0 or 1, fake class = 2
        real_targets = real_labels
        fake_targets = torch.full((b_size,), 2, dtype=torch.long)

        d_loss_real = criterion(real_output, real_targets)
        d_loss_fake = criterion(fake_output, fake_targets)
        d_loss = d_loss_real + d_loss_fake

        d_opt.zero_grad()
        d_loss.backward()
        d_opt.step()

        # ---- Train Generator ----
        z = torch.randn(b_size, noise_dim)
        gen_data = gen(z)
        d_output = disc(gen_data)

        # Try to fool discriminator: assign real labels randomly (0 or 1)
        g_targets = torch.randint(0, 2, (b_size,))
        g_loss = criterion(d_output, g_targets)

        g_opt.zero_grad()
        g_loss.backward()
        g_opt.step()

    print(f"Epoch [{epoch+1}/{epochs}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")


Epoch [1/10], D Loss: 0.5323, G Loss: 3.0236
Epoch [2/10], D Loss: 1.1239, G Loss: 2.6370
Epoch [3/10], D Loss: 1.2781, G Loss: 2.9823
Epoch [4/10], D Loss: 0.9044, G Loss: 2.9824
Epoch [5/10], D Loss: 1.2207, G Loss: 3.1507
Epoch [6/10], D Loss: 0.8147, G Loss: 3.5550
Epoch [7/10], D Loss: 0.6960, G Loss: 3.4037
Epoch [8/10], D Loss: 0.4785, G Loss: 3.8445
Epoch [9/10], D Loss: 0.8068, G Loss: 4.9976
Epoch [10/10], D Loss: 0.8279, G Loss: 4.4811


In [6]:
with torch.no_grad():
    test_output = disc(X_test)
    preds = torch.argmax(test_output[:, :2], dim=1)
    acc = (preds == y_test).float().mean()
    print(f"\nüéØ Test Accuracy: {acc.item():.4f}")



üéØ Test Accuracy: 1.0000


#Check Results

In [7]:
import random

# Function to classify a single news article
def classify_random_article(gen, disc, vectorizer, filepath='fake.csv'):
    import pandas as pd
    import torch

    # Load the dataset
    df = pd.read_csv(filepath)

    if 'label' not in df.columns:
        df['label'] = 0  # All samples from fake.csv assumed fake

    # Drop rows with missing text
    df = df.dropna(subset=['text']).reset_index(drop=True)

    # Pick a random sample
    idx = random.randint(0, len(df) - 1)
    text = df.loc[idx, 'text']
    true_label = df.loc[idx, 'label']

    # Vectorize the text using the same TF-IDF vectorizer
    text_vector = vectorizer.transform([text]).toarray()
    text_tensor = torch.tensor(text_vector, dtype=torch.float32)

    # Get prediction from discriminator
    disc.eval()
    with torch.no_grad():
        output = disc(text_tensor)
        pred_class = torch.argmax(output[:, :2], dim=1).item()

    label_map = {0: 'Fake', 1: 'Real'}
    print("üì∞ Random News Article:")
    print("-----------------------------------------------------")
    print(text[:1000] + ("..." if len(text) > 1000 else ""))
    print("-----------------------------------------------------")
    print(f"‚úÖ Predicted Class: {label_map[pred_class]}")
    print(f"üéØ True Label: {label_map[true_label]}")

# ‚ö†Ô∏è Call this only after training. It uses the trained `disc`, `vectorizer`.
classify_random_article(gen, disc, vectorizer)


üì∞ Random News Article:
-----------------------------------------------------
Obama made Wall Street out to be the enemy during his presidency. He bashed them on one hand then supported them with the other Speaking out of both sides of his mouth is a true talent Obama has. He counts on the people to forget and forgive his misdeeds and they do. Who else could get away with so many lies? Remember when he called Wall Street every bad name in the book? Fat Cats?He also said,  At some point you ve made enough money We d like to know what  enough money  means to Obama who s taking 400K for his speech at a Cantor Fitzgerald event in September. We think it s time for Obama to practice what he preaches and  spread the wealth . It s only fair, right?OBAMA WANTS TO GET IN ON THE ACT THE CLINTONS HAVE BEEN DOING FOR DECADES   BIG MONEY SPEECHES ARE THEIR BREAD AND BUTTER: OUR PREVIOUS REPORT ON SPEECHES THE CLINTONS HAVE GIVEN: Thank God she has a vagina, otherwise her blatant hypocrisy might be

In [9]:
# Function to classify a specific row (by index) from the dataset
def classify_article_by_row(gen, disc, vectorizer, row_index, filepath='fake.csv'):
    import pandas as pd
    import torch

    # Load the dataset
    df = pd.read_csv(filepath)

    # If label not present, assume all fake
    if 'label' not in df.columns:
        df['label'] = 0

    # Drop rows with missing text
    df = df.dropna(subset=['text']).reset_index(drop=True)

    # Check if row_index is valid
    if row_index >= len(df) or row_index < 0:
        print(f"‚ö†Ô∏è Invalid row index {row_index}. Dataset has {len(df)} rows.")
        return

    # Get the text and label
    text = df.loc[row_index, 'text']
    true_label = df.loc[row_index, 'label']

    # Vectorize text
    text_vector = vectorizer.transform([text]).toarray()
    text_tensor = torch.tensor(text_vector, dtype=torch.float32)

    # Predict class using the discriminator
    disc.eval()
    with torch.no_grad():
        output = disc(text_tensor)
        pred_class = torch.argmax(output[:, :2], dim=1).item()

    label_map = {0: 'Fake', 1: 'Real'}
    print(f"üì∞ News Article at Row {row_index}:")
    print("-----------------------------------------------------")
    print(text[:1000] + ("..." if len(text) > 1000 else ""))
    print("-----------------------------------------------------")
    print(f"‚úÖ Predicted Class: {label_map[pred_class]}")
    print(f"üéØ True Label: {label_map[true_label]}")

# üîç Example usage:
# Get classification result for row 10 (change to any index you want)
classify_article_by_row(gen, disc, vectorizer, row_index=10)


üì∞ News Article at Row 10:
-----------------------------------------------------
A centerpiece of Donald Trump s campaign, and now his presidency, has been his white supremacist ways. That is why so many of the public feuds he gets into involve people of color. One of his favorite targets, is, of course, the players in the National Football League who dare to exercise their First Amendment rights by kneeling during the national anthem in protest of racist police brutality. Well, there is one person who has figured out that racism is bad for business, even if it did get the orange overlord elected: The founder of the pizza chain Papa John s.This is a man who has never been on the right side of history on any number of issues, and plus his pizza sucks. But, when he decided to complain about the players protesting, his sales really dropped. Turns out racism doesn t pay, and we all know that corporations are all about the bottom line. Therefore, Papa John Schnatter will no longer be CEO 

In [10]:
# Get classification result for row 100 (change to any index you want)
classify_article_by_row(gen, disc, vectorizer, row_index=100)

üì∞ News Article at Row 100:
-----------------------------------------------------
Former Vice President Joe Biden was asked on Monday by Matt Lauer on NBC s  Today  to name something specific that Donald Trump has been  doing well. Well, that seems like a trick question since Trump has passed no major legislation and reaches across the aisle only to take shots at Democrats in his Twitter timeline during his morning rage-tweets, so Biden struggled to find something, anything, that Trump has done well since taking office. I think there s a number of things he s doing well. But even the things he s doing well, it s how he does them,  Biden said. It s more the tone of this administration that bothers me,  he continued. With all due respect, you haven t come up with one thing you think he s doing well,  Lauer said. Well, I think he married very well,  Biden joked.Although, Biden didn t mention which of Trump s three marriages he s speaking of. Trump s first marriage to Ivana ended after h

In [11]:
# Get classification result for row 123 (change to any index you want)
classify_article_by_row(gen, disc, vectorizer, row_index=123)

üì∞ News Article at Row 123:
-----------------------------------------------------
Long before Cara Mund,  Miss North Dakota,  won the 2018 Miss America pageant, she faced a crowd of 50 other young women, all competing in various categories for the coveted crown. Two of those categories, to most of our readers, are patriarchal and silly: The evening wear and swimwear  competitions  pit all these beautiful young women against one another for assessment on their looks by a panel of judges.The other two categories, however, can be quite interesting, especially when something happens like what went down with Miss Texas in this year s competition.The talent portion of the show is, of course, just plain entertaining. Perhaps you ve seen the movie Miss Congeniality, starring Sandra Bullock, and if you have, you know what I m hoping to see in a talent show. The real thing is very little like that, however, and so is the interview portion of Miss America. Source: Meme Central, a folder on my c