In [None]:

import pandas as pd
import numpy as np
import nltk
import re
import string
from textblob import TextBlob
import matplotlib.pyplot as plt

# Download necessary NLTK resources
nltk.download('punkt')

# Step 1: Synthetic Dataset (Stock news headlines and price changes)
data = {
    "headline": [
        "Company A reports strong earnings for the quarter, stock expected to rise.",
        "Company B faces a lawsuit over data privacy violations, stock might fall.",
        "Company C announces a new product launch with great market potential.",
        "Company D struggles with declining sales, analysts predict a drop in stock price.",
        "Positive market sentiment for Company E as they secure a major contract.",
        "Company F's revenue exceeds expectations, stock price to see a surge.",
        "Negative outlook for Company G due to regulatory hurdles."
    ],
    "price_change": [0.05, -0.04, 0.06, -0.03, 0.08, 0.07, -0.05]  # Percentage price change
}

# Convert to DataFrame
df = pd.DataFrame(data)
df.head()


In [None]:

# Step 2: Preprocessing headlines
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)  # Remove punctuation
    return text

df['headline'] = df['headline'].apply(preprocess_text)
df.head()


In [None]:

# Step 3: Sentiment Analysis using TextBlob
def get_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity  # Returns a value between -1 (negative) and 1 (positive)

df['sentiment'] = df['headline'].apply(get_sentiment)
df.head()


In [None]:

# Step 4: Trading Strategy based on sentiment
def trading_strategy(sentiment):
    if sentiment > 0.05:
        return "Buy"
    elif sentiment < -0.05:
        return "Sell"
    else:
        return "Hold"

df['signal'] = df['sentiment'].apply(trading_strategy)
df.head()


In [None]:

# Step 5: Calculate strategy returns
df['strategy_return'] = np.where(df['signal'] == "Buy", df['price_change'],
                                 np.where(df['signal'] == "Sell", -df['price_change'], 0))

# Calculate cumulative returns
df['cumulative_return'] = df['strategy_return'].cumsum()
df.head()


In [None]:

# Plot cumulative returns
plt.plot(df.index, df['cumulative_return'], marker='o', linestyle='-')
plt.title("Cumulative Returns of Sentiment-Based Trading Strategy")
plt.xlabel("Trade Index")
plt.ylabel("Cumulative Return")
plt.grid()
plt.show()
