In [1]:
%%writefile truthshield_app.py
import streamlit as st
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split

# Load and clean the data
df = pd.read_csv('NewTask.csv', encoding='ISO-8859-1')
df = df[['News_Headline', 'Label', 'Link_Of_News']].dropna()
df.columns = ['Statement', 'Label', 'Link']
df['Label'] = df['Label'].astype(str).str.upper()
df = df[df['Label'].isin(['TRUE', 'FALSE'])]
df['Label'] = df['Label'].apply(lambda x: 1 if x == 'TRUE' else 0)

# Source credibility scores
link_stats = df.groupby('Link')['Label'].agg(['count', 'mean']).rename(columns={'mean': 'truth_ratio'})
link_credibility = link_stats.to_dict(orient='index')

# Model training
X_train, X_val, y_train, y_val = train_test_split(df['Statement'], df['Label'], test_size=0.2, stratify=df['Label'], random_state=42)
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
model = LogisticRegressionCV(cv=5, max_iter=1000)
model.fit(X_train_vec, y_train)

# Streamlit interface
st.title("🛡️ TruthShield: AI-Powered Fake News & Misinformation Detector")

input_statement = st.text_area("Enter a news statement to verify:")

if st.button("Check"):
    if not input_statement.strip():
        st.warning("Please enter a statement.")
    else:
        # ML prediction
        x_new = vectorizer.transform([input_statement])
        pred = model.predict(x_new)[0]
        conf = model.predict_proba(x_new)[0][pred]
        verdict = "✅ TRUE NEWS" if pred == 1 else "❌ FAKE NEWS"
        reason = (
            "This statement is similar to reliable, verified information sources."
            if pred == 1 else
            "This statement shows patterns often seen in fake or misleading news articles."
        )

        # Fuzzy match the input to the closest known statement
        statements = df['Statement'].tolist()
        closest = difflib.get_close_matches(input_statement.strip(), statements, n=1, cutoff=0.6)
        
        if closest:
            matched_row = df[df['Statement'] == closest[0]]
            link = matched_row.iloc[0]['Link']
            if link in link_credibility:
                credibility = link_credibility[link]['truth_ratio'] * 10
                count = link_credibility[link]['count']
                bias = "⚠️ Biased Source" if credibility < 4 and count > 3 else "✅ Fair Source"
            else:
                credibility = 5.0
                bias = "⚠️ Unknown Source"
        else:
            link = "🔍 No matching news link found"
            credibility = 5.0
            bias = "⚠️ Bias unknown (link not found)"

        # Output results
        st.subheader("🔍 Analysis Result")
        st.write(f"**Verdict:** {verdict}")
        st.write(f"**Confidence:** {conf * 100:.2f}%")
        st.write(f"**Reason:** {reason}")
        st.write(f"**Source Link:** {link}")
        st.write(f"**Source Credibility:** {credibility:.1f}/10")
        st.write(f"**Bias Check:** {bias}")


Overwriting truthshield_app.py


In [None]:
!streamlit run truthshield_app.py