In [1]:
import requests
import pandas as pd
import re
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, classification_report

In [4]:
# Bearer Token for API Authentication
BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAFOwxQEAAAAA1s16fd2jTmJD3TaXs%2FNXJahGpzw%3DFSfln4xn0wtCnn2SfSjlZEEZAtjRAyVPrv1c9dvoCUvAEuUBCN"

# Define a function to access API data
def access_api_data(url, headers, params=None):
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Error: {response.status_code}, {response.text}")

# Example for Twitter API
TWITTER_API_URL = "https://api.twitter.com/2/tweets/search/recent"
HEADERS = {"Authorization": f"Bearer {BEARER_TOKEN}"}
PARAMS = {
    "query": "#stocks",
    "max_results": 100,
    "tweet.fields": "created_at,author_id,text"
}

# Access data from Twitter API
try:
    twitter_response = access_api_data(TWITTER_API_URL, HEADERS, PARAMS)
    tweets = twitter_response.get("data", [])
    twitter_data = pd.DataFrame(tweets)
    twitter_data.to_csv("twitter_api_data.csv", index=False)
except Exception as e:
    print(f"Failed to fetch Twitter data: {e}")

# Example for Telegram API (Using Telethon for Bearer Token-based Access)
from telethon.sync import TelegramClient

def fetch_telegram_data(api_id, api_hash, token, channel_name, limit=100):
    client = TelegramClient("session_name", api_id, api_hash)
    with client:
        messages = client.get_messages(channel_name, limit=limit)
        data = [{"date": msg.date, "sender": msg.sender_id, "message": msg.text} for msg in messages if msg.text]
        return pd.DataFrame(data)

# Fetch data using Telethon
API_ID = "29732947"
API_HASH = "YOUR_API_HASH"
CHANNEL_NAME = "1865578362657230848shub22797"

try:
    telegram_data = fetch_telegram_data(API_ID, API_HASH, BEARER_TOKEN, CHANNEL_NAME)
    telegram_data.to_csv("telegram_api_data.csv", index=False)
except Exception as e:
    print(f"Failed to fetch Telegram data: {e}")

Failed to fetch Twitter data: Error: 429, {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}
Failed to fetch Telegram data: You must use "async with" if the event loop is running (i.e. you are inside an "async def")


In [5]:
def preprocess_text(text):
    text = re.sub(r"http\S+|www\.\S+", "", text)  # Remove URLs
    text = re.sub(r"[^a-zA-Z ]", "", text)  # Keep only letters
    text = text.lower()  # Convert to lowercase
    return text

def perform_sentiment_analysis(data, column):
    data[column] = data[column].apply(preprocess_text)
    data['Sentiment'] = data[column].apply(lambda x: TextBlob(x).sentiment.polarity)
    return data

# Example Usage
processed_data = perform_sentiment_analysis(twitter_data, "text")
processed_data.to_csv("processed_sentiment_data.csv", index=False)

In [6]:
# Feature Extraction
vectorizer = CountVectorizer(max_features=1000)
X = vectorizer.fit_transform(processed_data['text']).toarray()

# Target Variable
processed_data['Label'] = processed_data['Sentiment'].apply(lambda x: 1 if x > 0 else 0)
y = processed_data['Label']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.95
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.94      0.97        18

    accuracy                           0.95        20
   macro avg       0.83      0.97      0.89        20
weighted avg       0.97      0.95      0.95        20



In [7]:
def predict_stock_movement(text):
    text_processed = preprocess_text(text)
    features = vectorizer.transform([text_processed]).toarray()
    prediction = model.predict(features)
    return "Positive" if prediction[0] == 1 else "Negative"

# Example Prediction
print(predict_stock_movement("This stock is performing exceptionally well!"))

Negative
