In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install -q gradio torch transformers pillow

import gradio as gr
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from PIL import Image

print("Loading text model...\n")

text_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
text_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
text_model.load_state_dict(torch.load('/content/drive/MyDrive/EAI6010_Final Project/Models/bert_best_model.pt', map_location=text_device))
text_model = text_model.to(text_device)
text_model.eval()

print("‚úÖ Text model loaded (84.5% accuracy)\n")

def predict_text(text):
    if not text or len(text.strip()) < 5:
        return None, None
    encoding = text_tokenizer.encode_plus(text, add_special_tokens=True, max_length=128,
                                         padding='max_length', truncation=True,
                                         return_attention_mask=True, return_tensors='pt')
    with torch.no_grad():
        outputs = text_model(input_ids=encoding['input_ids'].to(text_device),
                            attention_mask=encoding['attention_mask'].to(text_device))
        probs = torch.softmax(outputs.logits, dim=1)
    return float(probs[0][0]), float(probs[0][1])

def analyze_content(message, history):
    if not message:
        return """Send me a headline to analyze!

**Quick start:**
üéØ Click the colored sample buttons below for instant examples

**Or type your own:**
Just paste any news headline and I'll analyze it for misinformation signals.

What would you like me to check?"""

    message = message.strip()

    # Handle greetings ONLY if it's a short greeting
    if len(message) < 20 and message.lower() in ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon']:
        return """Hello! I'm TruthLens AI, your fake news detection assistant.

Click the sample buttons below or type any headline to analyze!"""

    # Handle help command
    if 'help' in message.lower() and len(message) < 20:
        return """**Usage Guide**

**How to use:**
Simply type or paste any news headline. I'll analyze it for misinformation signals.

**Sample buttons:**
Click the colored buttons below to see instant analysis of example headlines.

**What I analyze:**
- Language patterns (sensational, clickbait, exaggerated)
- Emotional manipulation tactics
- Suspicious claims and framing
- Statistical likelihood based on 20,000 training examples

**Available commands:**
- "about" - Learn about the system
- "stats" - View performance metrics
- "help" - Show this guide

**Best practice:**
Send complete headlines for most accurate results. I work best with social media style posts and news headlines.

Ready to analyze? Click a sample or type your own headline!"""

    # Handle about command
    if 'about' in message.lower() and len(message) < 20:
        return """**TruthLens AI - Text Analysis System**

**Architecture:**
BERT (Bidirectional Encoder Representations from Transformers)
- 110 million parameters
- Pre-trained on billions of words
- Fine-tuned on fake news detection

**Training:**
- Dataset: Fakeddit (20,000 Reddit posts)
- Split: 60% real news, 40% fake news
- Epochs: 2 (optimal convergence)
- Time: ~35 minutes on Tesla T4 GPU

**Performance:**
- Test Accuracy: 84.5%
- Precision: 81.4%
- Recall: 83.3%
- F1-Score: 0.824

**Baseline comparison:**
- Linear SVM: 75.7%
- BERT: 84.5%
- Improvement: +8.8 percentage points (+11.6% relative)

**Detection capabilities:**
Identifies sensational language, clickbait patterns, exaggerated claims, emotional manipulation, and other misinformation signals.

**Development:**
Built by EAI6010 Applications of AI students at Northeastern University.

Click samples below to see me in action!"""

    # Handle stats command
    if any(w in message.lower() for w in ['stat', 'accuracy', 'performance', 'metric']) and len(message) < 30:
        return """**Performance Metrics**

**Text Model (BERT):**
- Test Accuracy: 84.5%
- Precision: 81.4% (when I say "fake", I'm correct 81% of the time)
- Recall: 83.3% (I catch 83% of all fake headlines)
- F1-Score: 0.824

**Training details:**
- Dataset: Fakeddit (20,000 samples)
- Baseline (SVM): 75.7%
- BERT: 84.5%
- Improvement: +8.8 percentage points

**Speed:**
- Analysis time: <1 second per headline
- Production-ready for real-time deployment

**Comparison to research:**
- Published Fakeddit baseline: 73.4%
- Our model: 84.5%
- Exceeds baseline by +11 percentage points

Want to test these numbers? Try the sample buttons below!"""

    # Handle thank you
    if 'thank' in message.lower() and len(message) < 30:
        return "You're welcome! Feel free to analyze more headlines anytime."

    # Check if message is too long
    if len(message) > 500:
        return """That's quite long. I work best with headlines (1-2 sentences).

Could you send just the main headline?"""

    # Check if message is too short for analysis
    if len(message) < 10:
        return """Send me a headline to analyze!

**Quick start:**
üéØ Click the colored sample buttons below for instant examples

**Or type your own:**
Just paste any news headline and I'll analyze it for misinformation signals.

What would you like me to check?"""

    # MAIN ANALYSIS - This should run for all sample headlines
    try:
        text_fake, text_real = predict_text(message)

        if text_fake is None:
            return "Please send a valid headline (at least 10 characters)."

        response = "## üîç Analysis Complete\n\n**Text Analysis**\n\n---\n\n"

        text_conf = max(text_fake, text_real) * 100

        if text_conf < 65:
            text_verdict = "üü° UNCERTAIN"
        else:
            text_verdict = "üö® FAKE" if text_fake > 0.5 else "‚úÖ REAL"

        response += f"### Result: {text_verdict}\n**Confidence: {text_conf:.1f}%**\n\n"
        response += f"- Real News: {text_real*100:.1f}%\n"
        response += f"- Fake News: {text_fake*100:.1f}%\n\n"

        patterns = {
            'Sensational': ['breaking', 'shocking', 'unbelievable', 'stunning'],
            'Exaggerated': ['miracle', '100%', 'always', 'never', 'everyone'],
            'Emotional': ['!!!', 'terrifying', 'outrageous', 'devastating'],
            'Clickbait': ['won\'t believe', 'what happens', 'must see', 'shocking']
        }

        found_patterns = {}
        for category, keywords in patterns.items():
            matches = [kw for kw in keywords if kw.lower() in message.lower()]
            if matches:
                found_patterns[category] = matches

        if found_patterns:
            response += "**Suspicious patterns detected:**\n"
            for category, words in found_patterns.items():
                response += f"- {category}: *{', '.join(words)}*\n"
            response += "\n"
        else:
            response += "**Language analysis:** No obvious red flags detected\n\n"

        response += "---\n\n### üí° Assessment:\n\n"

        if text_fake > 0.85:
            response += "üî¥ **HIGH RISK**\n\nThis headline shows strong indicators of misinformation. The language patterns detected are characteristic of fake news in my training data.\n\n**Recommendation:** Strongly suggest fact-checking through established news sources (Snopes, PolitiFact, AP, Reuters) before considering this credible or sharing it."
        elif text_fake > 0.7:
            response += "üü† **MODERATE-HIGH RISK**\n\nThis headline has several characteristics associated with fake news. While I can't verify the factual claims themselves, the language style raises red flags.\n\n**Recommendation:** Approach with skepticism and verify through multiple trusted sources if this information is important to you."
        elif text_fake > 0.55:
            response += "üü° **MODERATE RISK**\n\nI'm leaning toward classifying this as fake, though my confidence is moderate. Some concerning patterns are present.\n\n**Recommendation:** Worth fact-checking if you're considering sharing or acting on this information."
        elif text_real > 0.85:
            response += "üü¢ **APPEARS LEGITIMATE**\n\nThis headline has characteristics of genuine news reporting - neutral tone, specific details, and lack of sensationalism.\n\n**Note:** While this appears legitimate, always verify important claims through original sources."
        elif text_real > 0.65:
            response += "üü¢ **LIKELY LEGITIMATE**\n\nThe headline structure and language suggest legitimate news reporting, though my confidence is moderate.\n\n**Note:** Always good practice to check the source for important information."
        else:
            response += "üü° **UNCERTAIN**\n\nMixed signals detected. The classification isn't clear-cut.\n\n**Recommendation:** Consider the source's credibility and cross-reference if the information matters to you."

        response += "\n\nAnalyze another? Click samples or type a new headline!"

        return response

    except Exception as e:
        return f"An error occurred during analysis: {str(e)}\n\nPlease try again with a different headline."

# Helper function for sample buttons
def add_sample_and_analyze(sample_text, history):
    """Helper function to properly handle sample button clicks"""
    result = analyze_content(sample_text, history)
    return history + [[sample_text, result]]

css = """
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
    max-width: 1400px;
    margin: auto;
}
.message.bot {
    background: linear-gradient(135deg, #f5f7fa 0%, #ffffff 100%);
    border-left: 4px solid #2196F3;
    border-radius: 12px;
    padding: 18px;
    margin: 12px 0;
    box-shadow: 0 4px 12px rgba(0,0,0,0.08);
}
.message.user {
    background: linear-gradient(135deg, #e3f2fd 0%, #f8f9fa 100%);
    border-left: 4px solid #1976D2;
    border-radius: 12px;
    padding: 18px;
    margin: 12px 0;
    box-shadow: 0 4px 12px rgba(0,0,0,0.08);
}
.metric-card {
    background: linear-gradient(135deg, #f6f8fa 0%, #ffffff 100%);
    padding: 1.25rem;
    border-radius: 10px;
    text-align: center;
    border: 1px solid #d0d7de;
    box-shadow: 0 2px 8px rgba(0,0,0,0.05);
}
button {
    border-radius: 8px;
    font-weight: 500;
    transition: all 0.2s ease;
}
button:hover {
    transform: translateY(-1px);
    box-shadow: 0 4px 12px rgba(0,0,0,0.15);
}
"""

with gr.Blocks(theme=gr.themes.Soft(), css=css, title="TruthLens AI") as demo:

    gr.HTML('<h1 style="font-size: 2.5rem; font-weight: 600; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 0.5rem; text-align: center;">üîç TruthLens AI</h1>')
    gr.HTML('<p style="text-align: center; color: #666; font-size: 1rem; margin-bottom: 1.5rem;">Advanced AI-Powered Fake News Detection</p>')

    with gr.Row():
        with gr.Column():
            gr.HTML('<div class="metric-card"><div style="font-size: 2rem; font-weight: 700; color: #0969da;">84.5%</div><div style="font-size: 0.85rem; color: #57606a; margin-top: 0.5rem;">Accuracy</div></div>')
        with gr.Column():
            gr.HTML('<div class="metric-card"><div style="font-size: 2rem; font-weight: 700; color: #0969da;">0.824</div><div style="font-size: 0.85rem; color: #57606a; margin-top: 0.5rem;">F1-Score</div></div>')
        with gr.Column():
            gr.HTML('<div class="metric-card"><div style="font-size: 2rem; font-weight: 700; color: #0969da;">+11.6%</div><div style="font-size: 0.85rem; color: #57606a; margin-top: 0.5rem;">Improvement</div></div>')

    gr.Markdown("---")

    with gr.Row():
        with gr.Column(scale=1, min_width=280):

            with gr.Accordion("‚ÑπÔ∏è About TruthLens", open=False):
                gr.Markdown("""
                **Text Analysis Engine**
                - Model: BERT (110M parameters)
                - Dataset: Fakeddit (20K posts)
                - Accuracy: 84.5%

                Detects clickbait, sensational language, exaggerated claims, and misinformation patterns.
                """)

            with gr.Accordion("üìä Performance", open=False):
                gr.Markdown("""
                | Metric | Score |
                |--------|-------|
                | Accuracy | 84.5% |
                | Precision | 81.4% |
                | Recall | 83.3% |
                | F1-Score | 0.824 |

                **Baseline:** SVM 75.7%
                **BERT:** 84.5%
                **Improvement:** +8.8pp

                Exceeds published baseline by +11 points!
                """)

            with gr.Accordion("üí° Quick Guide", open=False):
                gr.Markdown("""
                **How to use:**
                1. Click sample buttons below
                2. Or type your own headline
                3. Get instant analysis

                **Commands:**
                - "help" - Full guide
                - "about" - System details
                - "stats" - Performance metrics

                **Best results:**
                Complete headlines work best!
                """)

            gr.Markdown("**üéØ Try Sample Headlines:**")
            s1 = gr.Button("üö® Fake: Miracle Cure", size="sm")
            s2 = gr.Button("‚úÖ Real: Senate Hearing", size="sm")
            s3 = gr.Button("üö® Fake: Celebrity Shock", size="sm")
            s4 = gr.Button("‚úÖ Real: Research Study", size="sm")

            gr.Markdown("\n**Quick Commands:**")
            help_btn = gr.Button("‚ùì Help Guide", size="sm")
            about_btn = gr.Button("‚ÑπÔ∏è About System", size="sm")

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                height=520,
                show_label=False,
                avatar_images=(None, "ü§ñ"),
                bubble_full_width=False,
                show_copy_button=True
            )

            with gr.Row():
                msg = gr.Textbox(
                    placeholder="Type any news headline or click sample buttons...",
                    show_label=False,
                    scale=5,
                    container=False
                )
                send_btn = gr.Button("Send", variant="primary", scale=1)

            clear_btn = gr.Button("üóëÔ∏è Clear Conversation", size="sm", variant="secondary")

    gr.Markdown('<div style="text-align: center; padding: 1.5rem; color: #57606a; font-size: 0.875rem;">EAI6010 Applications of AI | Northeastern University | Powered by BERT on Fakeddit Dataset</div>')

    def respond(message, history):
        result = analyze_content(message, history)
        new_history = history + [[message, result]]
        return "", new_history

    send_btn.click(respond, [msg, chatbot], [msg, chatbot])
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear_btn.click(lambda: [], None, chatbot)

    # FIXED: Sample button handlers
    s1.click(
        lambda h: add_sample_and_analyze(
            "BREAKING: Scientists discover miracle cure that works 100% of the time!!!", h
        ),
        inputs=chatbot,
        outputs=chatbot
    )

    s2.click(
        lambda h: add_sample_and_analyze(
            "Senate committee schedules hearing on healthcare reform for next month", h
        ),
        inputs=chatbot,
        outputs=chatbot
    )

    s3.click(
        lambda h: add_sample_and_analyze(
            "You won't BELIEVE what this celebrity just said about politics - SHOCKING revelation!!!", h
        ),
        inputs=chatbot,
        outputs=chatbot
    )

    s4.click(
        lambda h: add_sample_and_analyze(
            "Research team publishes peer-reviewed findings on climate change impacts in Nature journal", h
        ),
        inputs=chatbot,
        outputs=chatbot
    )

    help_btn.click(lambda h: h + [["help", analyze_content("help", h)]], inputs=chatbot, outputs=chatbot)
    about_btn.click(lambda h: h + [["about", analyze_content("about", h)]], inputs=chatbot, outputs=chatbot)

print("="*70)
print("üöÄ LAUNCHING TRUTHLENS AI - TEXT DETECTION")
print("="*70)
print("\n‚ú® Features:")
print("   ‚úì BERT text analysis (84.5% accuracy)")
print("   ‚úì Sample buttons with instant analysis")
print("   ‚úì Professional Claude-inspired interface")
print("   ‚úì Clean, focused design")
print("\nüìù Note: Image model integration coming in full release")
print("="*70 + "\n")

demo.launch(share=True, debug=True)

print("\nüéâ TruthLens AI is LIVE!")
print("‚úÖ Text analysis working perfectly!")
print("üéØ Sample buttons trigger instant analysis!")



Loading text model...



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Text model loaded (84.5% accuracy)



  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="TruthLens AI") as demo:
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="TruthLens AI") as demo:
  chatbot = gr.Chatbot(
  chatbot = gr.Chatbot(
  chatbot = gr.Chatbot(
  chatbot = gr.Chatbot(


üöÄ LAUNCHING TRUTHLENS AI - TEXT DETECTION

‚ú® Features:
   ‚úì BERT text analysis (84.5% accuracy)
   ‚úì Sample buttons with instant analysis
   ‚úì Professional Claude-inspired interface
   ‚úì Clean, focused design

üìù Note: Image model integration coming in full release

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ff27d6c56a8dd92e4d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
