In [None]:
# Install necessary libraries: pip install telethon pandas scikit-learn nltk

from telethon.sync import TelegramClient
from telethon.sessions import MemorySession
from telethon.tl.functions.messages import GetHistoryRequest
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import numpy as np
import asyncio

In [None]:

nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

api_id = '' # provide api_id
api_hash = '' # provide hash value
channel_username = '' # provide channel_name


# Updated Scraping function using MemorySession to avoid database locking
async def scrape_telegram_data(api_id, api_hash, channel_username, limit=500, keywords=['stock', 'market', 'price']):
    async with TelegramClient('session_name', api_id, api_hash) as client:
        # This will prompt for phone number and authentication
        await client.start()

        messages = []
        history = await client(GetHistoryRequest(
            peer=channel_username,
            limit=limit,
            offset_date=None,
            offset_id=0,
            max_id=0,
            min_id=0,
            add_offset=0,
            hash=0))

        for message in history.messages:
            if message.message and any(keyword in message.message.lower() for keyword in keywords):
                messages.append(message.message)

    return messages


# Preprocess and extract features
def preprocess_and_extract_features(messages):
    data = pd.DataFrame({'message': messages})

    # Sentiment analysis
    data['sentiment'] = data['message'].apply(lambda x: sia.polarity_scores(x)['compound'])

    # Message length as a feature
    data['length'] = data['message'].apply(len)

    # Label generation (dummy labels for demonstration; replace with real data if available)
    data['label'] = np.random.choice([0, 1], size=len(data))

    return data

# Build and evaluate the prediction model
def train_and_evaluate_model(data):
    X = data[['sentiment', 'length']]
    y = data['label']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Model training with RandomForestClassifier
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Model prediction
    y_pred = model.predict(X_test)

    # Evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    metrics = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall
    }

    return model, metrics

# Main execution
if __name__ == "__main__":
    # Step 1: Scrape data
    messages = await scrape_telegram_data(api_id, api_hash, channel_username)

    # Step 2: Preprocess and extract features
    data = preprocess_and_extract_features(messages)

    # Step 3: Train and evaluate the model
    model, metrics = train_and_evaluate_model(data)

    # Output model evaluation metrics
    print("Model Evaluation Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.2f}")
