In [None]:
import asyncio
from telethon import TelegramClient
import pickle
import re
from nltk.stem import PorterStemmer
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer

# Download NLTK resources if not already installed
nltk.download('punkt')

# TELEGRAM API CREDENTIALS

api_id = ''
api_hash = ''
channel_username = ''  # e.g., '@example_channel'

# Initialize Porter Stemmer
stemmer = PorterStemmer()

# Function to scrape Telegram messages (asynchronous)
async def scrape_telegram_messages():
    async with TelegramClient('session_name', api_id, api_hash) as client:

        messages = await client.get_messages(channel_username, limit=100)  # Fetch last 100 messages
        return [msg.message for msg in messages if msg.message]

# TEXT PREPROCESSING WITH STEMMING
def preprocess_text(text):
    # Basic cleaning
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)    # Remove mentions
    text = re.sub(r'#\w+', '', text)    # Remove hashtags
    text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
    text = text.lower().strip()         # Convert to lowercase and strip whitespace

    # Tokenization and stemming
    tokens = text.split()  # Split text into words
    stemmed_tokens = [stemmer.stem(word) for word in tokens]  # Apply stemming
    return ' '.join(stemmed_tokens)  # Join back into a single string

# LOAD MODEL
def load_model(model_path):
    with open(model_path, 'rb') as model_file:
        model = pickle.load(model_file)
    return model

# FAKE NEWS DETECTION
def detect_fake_news(messages, model):
    # Preprocess messages and apply stemming
    cleaned_data = [preprocess_text(msg) for msg in messages]

    # Initialize a new vectorizer (fit it on your messages)
    vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 3), min_df=1)
    features = vectorizer.fit_transform(cleaned_data)  # Fit and transform the text

    # Make predictions
    predictions = model.predict(features)
    return [{"message": msg, "is_fake": bool(pred)} for msg, pred in zip(messages, predictions)]

# MAIN EXECUTION
async def main():
    # Path to your pickle file
    model_path = 'fakenewsmodel.pkl'

    # Load model
    print("Loading model...")
    model = load_model(model_path)

    # Scrape Telegram messages
    print("Scraping messages from Telegram channel...")
    scraped_messages = await scrape_telegram_messages()

    if not scraped_messages:
        print("No messages found!")
    else:
        # Detect fake news
        print("Detecting fake news...")
        results = detect_fake_news(scraped_messages, model)

        # Display results
        for result in results:
            print(f"Message: {result['message']}\nFake News: {result['is_fake']}\n")

# Run the asynchronous main function (for Jupyter Notebooks)
if __name__ == "__main__":
    await main()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Loading model...
Scraping messages from Telegram channel...
Please enter your phone (or bot token): +917997084098
Please enter the code you received: 78599
Signed in successfully as Sai Rohan; remember to not break the ToS or you will risk an account ban!
Detecting fake news...
Message: President Biden calls for ban on congressional stock trading. 

"Nobody in the Congress should be able to make money in the stock market while they're in the Congress."

🔵Powered by V3V Ventures
Fake News: False

Message: China executes official in $412 million corruption case. 

China on Tuesday executed a 64-year-old former official who was convicted of bribery, state media reported.

Li Jianping, who served in the northern Inner Mongolia autonomous region, was convicted of corruption, bribery, misappropriation of public funds, and collusion with a criminal syndicate.

The 64-year-old official was given a death sentence in September 2022, which was upheld in August this year.

🔵Powered by V3V Ventures

In [None]:
import pickle

# Specify the path to your .pkl file
file_path = 'fakenewsmodel.pkl' # Replace with the actual path

try:
    # Open and load the .pkl file
    with open(file_path, 'rb') as file:
        data = pickle.load(file)

    # Now you can work with the loaded data
    print(data)

except pickle.UnpicklingError as e:
    print(f"Error loading the pickle file: {e}")
    print("Please check if the file is complete and not corrupted.")

except FileNotFoundError as e:
    print(f"Error: File not found: {e}")
    print("Please ensure the file exists at the specified path.")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Error loading the pickle file: pickle data was truncated
Please check if the file is complete and not corrupted.
