In [4]:
import yfinance as yf
import datetime
from typing import List, Tuple

In [5]:

class StockPriceFetcher:
    """
    Fetches historical stock prices using Yahoo Finance.
    """

    def __init__(self, symbol: str):
        self.symbol = symbol

    def get_price_change(self, date: str, window: int = 1) -> Tuple[float, float]:
        """
        Fetch stock price on a given date and compare it with the next trading day to calculate price change.
        
        Parameters:
        - date (str): Date in 'YYYY-MM-DD' format
        - window (int): Number of days to check the change (default: 1 day)
        
        Returns:
        - Tuple of initial price, price change percentage
        """
        # Convert to datetime
        event_date = datetime.datetime.strptime(date, "%Y-%m-%d")
        
        # Fetch historical prices (1 week before and 1 week after event)
        start_date = (event_date - datetime.timedelta(days=7)).strftime("%Y-%m-%d")
        end_date = (event_date + datetime.timedelta(days=window + 7)).strftime("%Y-%m-%d")
        
        stock_data = yf.download(self.symbol, start=start_date, end=end_date)
        
        # Find the closest date after the event date with available price
        event_price = stock_data['Close'][event_date.strftime("%Y-%m-%d"):].head(1)
        future_price = stock_data['Close'][(event_date + datetime.timedelta(days=window)).strftime("%Y-%m-%d"):].head(1)

        if not event_price.empty and not future_price.empty:
            price_change = ((future_price.values[0] - event_price.values[0]) / event_price.values[0]) * 100
            return event_price.values[0], price_change
        else:
            return None, None

# Example usage
symbol = "RELIANCE.NS"
date = "2023-10-10"  # Date when the event happened

fetcher = StockPriceFetcher(symbol)
initial_price, change_percentage = fetcher.get_price_change(date)

if initial_price and change_percentage is not None:
    print(f"Initial Price: {initial_price}")
    print(f"Price Change: {change_percentage:.2f}%")
else:
    print("Could not fetch price data for the given date.")



[*********************100%***********************]  1 of 1 completed

Initial Price: 1154.199951171875
Price Change: 1.59%





In [6]:
!pip install spacy 





[notice] A new release of pip available: 22.3 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [19]:
import spacy


class EventExtractor:
    """
    Extracts structured events (Actor, Action, Object) from news text using spaCy.
    """

    def __init__(self):
        # Load spaCy model with dependency parsing and NER
        self.nlp = spacy.load("en_core_web_sm")

    def extract_events(self, text: str):
        """
        Extracts (Actor, Action, Object) tuples from a given news sentence.

        Parameters:
        - text (str): The news text.

        Returns:
        - List[Tuple[str, str, str]]: List of extracted events as (Actor, Action, Object)
        """
        doc = self.nlp(text)
        events = []

        for token in doc:
            # Identify the Actor (subject) and Action (verb)
            if token.dep_ in ("nsubj", "nsubjpass") and token.head.pos_ == "VERB":
                subject = token.text
                action = token.head.lemma_  # Use the base form of the verb
                obj = None

                # Look for a direct object or a prepositional object
                for child in token.head.children:
                    if child.dep_ == "dobj":  # Direct object takes priority
                        obj = child.text
                        # Check if there are multiple words (e.g., "Future Retail")
                        if child.i + 1 < len(doc) and doc[child.i + 1].dep_ in ("compound", "amod"):
                            obj = f"{doc[child.i + 1].text} {obj}"
                        break  # Stop once direct object is found
                    elif child.dep_ == "prep" and obj is None:
                        # Check for prepositional phrase (e.g., "in a deal")
                        for prep_obj in child.children:
                            if prep_obj.dep_ == "pobj":
                                obj = prep_obj.text

                # Ensure that the extracted object is meaningful
                if subject and action and obj:
                    events.append((subject, action, obj))

        return events


# Example usage
text = "Reliance acquired Future Retail in a massive deal."
extractor = EventExtractor()
extracted_events = extractor.extract_events(text)

for event in extracted_events:
    print(f"Actor: {event[0]}, Action: {event[1]}, Object: {event[2]}")


Actor: Reliance, Action: acquire, Object: Retail


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from transformers import BertModel, BertTokenizer
from typing import List, Tuple

# Initialize BERT model and tokenizer for word embeddings
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")



class EventEmbeddingNN(nn.Module):
    """
    Neural Network to create event embeddings from structured events.
    """

    def __init__(self, embedding_dim: int = 768):
        super(EventEmbeddingNN, self).__init__()
        self.embedding_dim = embedding_dim
        self.tensor1 = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
        self.tensor2 = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
        self.tensor3 = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
        self.linear = nn.Linear(embedding_dim * 2, embedding_dim)
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.tensor1)
        nn.init.xavier_uniform_(self.tensor2)
        nn.init.xavier_uniform_(self.tensor3)

    def forward(self, actor_embed, action_embed, object_embed):
        r1 = torch.matmul(actor_embed, self.tensor1) * action_embed
        r2 = torch.matmul(action_embed, self.tensor2) * object_embed
        r3 = torch.matmul(r1 + r2, self.tensor3)
        event_embedding = torch.tanh(r3)
        return event_embedding

def get_bert_embeddings(text: str) -> torch.Tensor:
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=10)
    outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

def create_event_embedding(actor: str, action: str, obj: str) -> torch.Tensor:
    actor_embedding = get_bert_embeddings(actor)
    action_embedding = get_bert_embeddings(action)
    object_embedding = get_bert_embeddings(obj)
    model = EventEmbeddingNN()
    event_embedding = model(actor_embedding, action_embedding, object_embedding)
    return event_embedding

# Example usage
event_extractor = EventExtractor()
news_text = "Reliance acquires Future Retail in a massive deal."
events = event_extractor.extract_events(news_text)

for event in events:
    actor, action, obj = event
    event_embedding = create_event_embedding(actor, action, obj)
    print(f"Event: {actor} {action} {obj}")
    print(f"Event Embedding: {event_embedding}")



  from .autonotebook import tqdm as notebook_tqdm


Event: Reliance acquire Retail
Event Embedding: tensor([[ 0.1427, -0.1194,  0.0264, -0.0039, -0.0372, -0.1084,  0.1465, -0.0601,
          0.0433,  0.0484,  0.0336, -0.0329, -0.0151,  0.0405,  0.0069,  0.0929,
         -0.0147, -0.0184, -0.1057, -0.0591, -0.0640, -0.0976, -0.0260, -0.0631,
          0.0583, -0.0780,  0.0372, -0.0429,  0.0558, -0.3398,  0.1329, -0.1668,
          0.1139, -0.0829,  0.1714, -0.0284, -0.0011, -0.1147, -0.2159, -0.0263,
         -0.0498,  0.0198,  0.0752,  0.1514, -0.0728,  0.2084, -0.0252,  0.2546,
         -0.3513, -0.1998,  0.0302, -0.0833,  0.0578,  0.1563, -0.0047, -0.0557,
          0.0833, -0.1759, -0.0597,  0.0334,  0.2350, -0.2097,  0.0416,  0.0335,
         -0.0600,  0.0589,  0.1498, -0.1702, -0.0733, -0.1223, -0.1611, -0.1095,
          0.1355, -0.3380,  0.0883, -0.1390,  0.1484, -0.2507,  0.0465,  0.0387,
         -0.0041, -0.1013, -0.0185,  0.0395,  0.0401, -0.1520, -0.0121,  0.0797,
          0.0043,  0.0546,  0.0217,  0.0574,  0.1742, -0.0755

In [9]:
def weighted_label_event(date, stock_fetcher, thresholds=(-2, -1, 1, 2)):
    """
    Label each event with a weight based on stock price change magnitude.

    Parameters:
    - thresholds: Tuple defining boundaries for categorizing price change.
                  e.g., (-2, -1, 1, 2) where:
                  - > 2% is Strongly Positive (2)
                  - 1% to 2% is Moderately Positive (1)
                  - -1% to 1% is Neutral (0)
                  - -2% to -1% is Moderately Negative (-1)
                  - < -2% is Strongly Negative (-2)
    """
    initial_price, price_change = stock_fetcher.get_price_change(date)
    if price_change is not None:
        if price_change > thresholds[3]:
            return 2  # Strongly Positive
        elif thresholds[2] <= price_change <= thresholds[3]:
            return 1  # Moderately Positive
        elif thresholds[1] <= price_change < thresholds[2]:
            return 0  # Neutral
        elif thresholds[0] <= price_change < thresholds[1]:
            return -1  # Moderately Negative
        else:
            return -2  # Strongly Negative
    return None

In [11]:
import json
import pandas as pd

# Load JSON data
with open("Dataset/reliance_news.json", "r") as file:
    news_data = json.load(file)

# Parse and convert JSON data to a DataFrame
news_items = [
    {
        "title": article.get("title"),
        "description": article.get("description"),
        "published_date": article.get("published_at").split("T")[0]  # Use only the date part
    }
    for article in news_data.get("articles", [])
]

df_news = pd.DataFrame(news_items)
df_news['text'] = df_news['title'] + " " + df_news['description']
print(df_news.head())

                                               title  \
0  Exclusive: Industrialist Nikhil Merchant leads...   
1  India’s Reliance gets shareholders’ nod to add...   
2          Rogers misses quarterly revenue estimates   
3  Exclusive: Tycoon Nikhil Merchant leads race f...   
4  BP to Open Fuel Station in India Amid Record P...   

                                         description published_date  \
0  Low-profile Gujarat businessman wants to add P...     2021-10-21   
1  BENGALURU &#8212; India&#8217;s Reliance Indus...     2021-10-21   
2  Rogers Communications Inc reported third-quart...     2021-10-21   
3  Low-profile Gujarat businessman wants to add P...     2021-10-21   
4  BP and Reliance Industries signed a $6 billion...     2021-10-21   

                                                text  
0  Exclusive: Industrialist Nikhil Merchant leads...  
1  India’s Reliance gets shareholders’ nod to add...  
2  Rogers misses quarterly revenue estimates Roge...  
3  Exclusive: Ty

In [21]:
# Prepare dataset with weighted labels
event_data = []

# Iterate over each row in df_news
for index, row in df_news.iterrows():
    text = row['text']
    date = row['published_date']
    
    # Extract events and assign a label based on stock price change
    events = event_extractor.extract_events(text)
    label = weighted_label_event(date, fetcher)
    print(events)
    # Process each event and create an embedding
    for event in events:
        actor, action, obj = event
        event_embedding = create_event_embedding(actor, action, obj)
        
        # Append to event_data
        event_data.append({
            "date": date,
            "event_embedding": event_embedding,
            "label": label  # Weighted label
        })

# Convert to DataFrame for further processing
df_events = pd.DataFrame(event_data)
print(df_events.head())


[('Merchant', 'lead', 'race')]
[('Reliance', 'get', 'shareholders'), ('Ltd', 'say', 'Thursday'), ('majority', 'pass', 'resolution')]
[('Rogers', 'miss', 'Rogers estimates'), ('Inc', 'report', 'revenue'), ('that', 'miss', 'analysts&#8217'), ('restrictions', 'keep', 'indoors'), ('Rogers', 'face', 'pressure')]
[('Merchant', 'lead', 'race')]
[('BP', 'sign', 'agreement')]
[('pump', 'buy', 'stake'), ('BP', 'buy', 'stake')]
[('Resolutions', 'take', 'responsibility')]
[('Dividend', 'end', 'Exchanges')]
[('Alert', 'take', 'responsibility')]
[]
[]
[]
[('hope', 'have', 'reason'), ('Rockets', 'have', 'reason'), ('Rockets', 'trail', 'points')]
[('biz', 'target', 'manufacturing'), ('Reliance', 'target', 'manufacturing'), ('It', 'spend', 'billion')]
[('HUL', 'hit', 'valuation'), ('RIL', 'hit', 'valuation'), ('Unilever', 'emerge', 'laggards')]
[('prices', 'put', 'spotlight'), ('Buildings', 'produce', 'third')]
[('Stocks', 'close', 'session'), ('Industries', 'close', 'session')]
[]
[]
[]
[('Stocks', 'f

IndexError: [E042] Error accessing `doc[29].nbor(1)`, for doc of length 30.

In [17]:
import pandas as pd
import datetime

class StockPriceFetcher2:
    def __init__(self, stock_data_path):
        self.stock_data = pd.read_csv(stock_data_path, parse_dates=["Date"])
        self.stock_data.set_index("Date", inplace=True)

    def get_price_change(self, date, window=1):
        """
        Get price change for a specific date with respect to the next trading day.
        """
        event_date = pd.to_datetime(date)
        
        try:
            initial_price = self.stock_data.at[event_date, "Close"]
            future_price = self.stock_data.at[event_date + pd.Timedelta(days=window), "Close"]
            price_change = ((future_price - initial_price) / initial_price) * 100
            
            return initial_price, price_change
        except KeyError:
            return None, None

# Initialize fetcher with the local CSV
fetcher = StockPriceFetcher2("Dataset/reliance_stock_history.csv")
