In [12]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from safetensors.torch import load_file
import numpy as np
import requests
from datetime import datetime, timedelta

In [13]:
class RealTimeDeepfakeDetector:
    def __init__(self, model_path="model.safetensors"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        
        # PROPER model initialization
        try:
            # First try loading full model (if saved with save_pretrained)
            self.model = BertForSequenceClassification.from_pretrained(
                model_path,
                local_files_only=True
            ).to(self.device)
        except:
            # Fallback to loading just weights
            config = BertConfig.from_pretrained(
                'bert-base-uncased',
                num_labels=2
            )
            self.model = BertForSequenceClassification(config).to(self.device)
            
            # Load weights
            if model_path.endswith('.safetensors'):
                state_dict = load_file(model_path, device=str(self.device))
            else:
                state_dict = torch.load(model_path, map_location=self.device)
            self.model.load_state_dict(state_dict)
        
        self.model.eval()
        self.news_api_key = "8fea2b1622f5406d880ae7835647cf34"  # Replace with actual key
        self.verification_threshold = 0.7

    def predict(self, text):
        """Complete prediction pipeline"""
        # Step 1: Get BERT prediction
        bert_pred, bert_conf = self._bert_predict(text)
        
        # Step 2: Verify if needed
        if bert_conf < self.verification_threshold or bert_pred == "Fake":
            is_verified, sources = self._verify_news(text)
            if is_verified:
                return "Real (Verified)", 1.0, sources
            return bert_pred, bert_conf, []
        
        return bert_pred, bert_conf, []

    def _bert_predict(self, text):
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=512,
            return_tensors='pt'
        ).to(self.device)
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1)[0].cpu().numpy()
        
        pred = "Fake" if np.argmax(probs) == 1 else "Real"
        return pred, float(np.max(probs))

    def _verify_news(self, text, days=2):
        """Verify with NewsAPI"""
        try:
            query = " ".join(text.split()[:5])  # Use first 5 words as query
            date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
            
            url = f"https://newsapi.org/v2/everything?q={query}&from={date}&apiKey={self.news_api_key}"
            response = requests.get(url).json()
            
            sources = []
            for article in response.get('articles', [])[:3]:  # Top 3 results
                if self._is_similar(text, article['title']) or self._is_similar(text, article['content']):
                    sources.append({
                        'source': article['source']['name'],
                        'url': article['url'],
                        'date': article['publishedAt']
                    })
            
            return len(sources) > 0, sources
        except:
            return False, []

    def _is_similar(self, text1, text2, threshold=0.5):
        """Basic similarity check"""
        if not text2:
            return False
            
        set1 = set(text1.lower().split())
        set2 = set(text2.lower().split())
        return len(set1 & set2) / len(set1 | set2) >= threshold


In [22]:
# Usage Example
if __name__ == "__main__":
    try:
        # Initialize with your model path
        detector = RealTimeDeepfakeDetector("/Users/fenilvadher/Documents/Collage Data/SEM - 6/AI/AI Project/bert_fake_news_model/model.safetensors")
        
        # Test cases
        test_texts = [
            "OMG. Are Anupama Parameswaran and Dhruv Vikram dating?” wrote a Redditor posting a screengrab of the playlist with the picture on various movie groups. “Now they deleted this playlist or made private I guess,” commented one fan who searched for the playlist. Another wrote, “The playlist is removed now…so I guess it’s true.",
            "(Reuters)Donald Trump spent a good portion of his day at his golf club, marking the 84th day he s done so since taking the oath of office. It must have been a bad game because just after that, Trump lashed out at FBI Deputy Director Andrew McCabe on Twitter following a report saying McCabe plans to retire in a few months. The report follows McCabe s testimony in front of congressional committees this week, as well as mounting criticism from Republicans regarding the Russia probe.So, naturally, Trump attacked McCabe with a lie. How can FBI Deputy Director Andrew McCabe, the man in charge, along with leakin  James Comey, of the Phony Hillary Clinton investigation (including her 33,000 illegally deleted emails) be given $700,000 for wife s campaign by Clinton Puppets during investigation?  Trump tweeted.How can FBI Deputy Director Andrew McCabe, the man in charge, along with leakin  James Comey, of the Phony Hillary Clinton investigation (including her 33,000 illegally deleted emails) be given $700,000 for wife s campaign by Clinton Puppets during investigation?  Donald J. Trump (@realDonaldTrump) December 23, 2017He didn t stop there.FBI Deputy Director Andrew McCabe is racing the clock to retire with full benefits. 90 days to go?!!!  Donald J. Trump (@realDonaldTrump) December 23, 2017Wow,  FBI lawyer James Baker reassigned,  according to @FoxNews.  Donald J. Trump (@realDonaldTrump) December 23, 2017With all of the Intel at Trump s disposal, he s getting his information from Fox News. McCabe spent most of his career in the fight against terrorism and now he s being attacked by the so-called president. Trump has been fact-checked before on his claim of his wife receiving $700,000 for her campaign.Politifact noted in late July that Trump s  tweet about Andrew McCabe is a significant distortion of the facts. And the implication that McCabe got Clinton off as a political favor doesn t make much sense when we look at the evidence. His July tweet was rated  mostly false.  But Trump repeats these lies because he knows his supporters will believe them without bothering to Google. It s still a lie, though.Photo by Zach Gibson   Pool/Getty Images.",
            "(Reuters)The following statements were posted to the verified Twitter accounts of U.S. President Donald Trump, @realDonaldTrump and @POTUS.  The opinions expressed are his own. Reuters has not edited the statements or confirmed their accuracy.  @realDonaldTrump : - “On 1/20 - the day Trump was inaugurated - an estimated 35,000 ISIS fighters held approx 17,500 square miles of territory in both Iraq and Syria. As of 12/21, the U.S. military estimates the remaining 1,000 or so fighters occupy roughly 1,900 square miles...” via @jamiejmcintyre  [1749 EST] - Just left West Palm Beach Fire & Rescue #2. Met with great men and women as representatives of those who do so much for all of us. Firefighters, paramedics, first responders - what amazing people they are! [1811 EST] - “On 1/20 - the day Trump was inaugurated - an estimated 35,000 ISIS fighters held approx 17,500 square miles of territory in both Iraq and Syria. As of 12/21, the U.S. military est the remaining 1,000 or so fighters occupy roughly 1,900 square miles..” @jamiejmcintyre @dcexaminer [2109 EST] - bit.ly/2liRH3b [2146 EST] -- Source link: (bit.ly/2jBh4LU) (bit.ly/2jpEXYR)",
            "(Reuters) - A gift-wrapped package addressed to U.S. Treasury Secretary Steven Mnuchin’s home in a posh Los Angeles neighborhood that was suspected of being a bomb was instead filled with horse manure, police told local media. The package was found Saturday evening in a next-door neighbor’s driveway in Bel Air, the Los Angeles Police Department told the Los Angeles Times and KNBC television, the NBC affiliate in Los Angeles. The package also included a Christmas card with negative comments about President Donald Trump and the new U.S. tax law signed by Trump last week. Reuters could not reach LAPD officials for comment on Sunday. An LAPD bomb squad X-rayed the package before opening it and found the horse manure inside, police told local media. Aerial footage from KNBC showed officers investigating a large box in wrapping paper, then dumping a large amount of what they later identified as the manure and opening the card that was included inside.  Mnuchin, who KNBC said was not home when the package was discovered, is a former Goldman Sachs Group Inc executive and Hollywood film financier. A road in Bel Air was closed for about two hours, KNBC reported. The U.S. Secret Service is also investigating the incident, according to the TV station. "# Real
        ]
        
        for text in test_texts:
            pred, conf, sources = detector.predict(text)
            print(f"\nText: {text[:60]}...")
            print(f"Prediction: {pred} ({conf:.1%})")
            if sources:
                print(f"Verified by: {sources[0]['source']}")
    
    except Exception as e:
        print(f"Initialization failed: {str(e)}")
        print("\nTroubleshooting steps:")
        print("1. Ensure your model file exists at the specified path")
        print("2. Verify the model file is either:")
        print("   - A .safetensors file")
        print("   - A .bin file from save_pretrained()")
        print("3. Check your NewsAPI key is valid")



Text: OMG. Are Anupama Parameswaran and Dhruv Vikram dating?” wrot...
Prediction: Fake (98.8%)

Text: (Reuters)Donald Trump spent a good portion of his day at his...
Prediction: Real (100.0%)

Text: (Reuters)The following statements were posted to the verifie...
Prediction: Real (100.0%)

Text: (Reuters) - A gift-wrapped package addressed to U.S. Treasur...
Prediction: Real (100.0%)
