In [1]:
import numpy as np
import matplotlib.pyplot as plt
from news_database_interface import interface
import yfinance as yf
import pandas as pd

In [None]:
class MultiplicativeWeightsExpert:
    def __init__(self, stock: str, experts_list: list[str], learning_rate: float = 0.1, movement_threshold: float = 0.01):
        """
        Initialize the multiplicative weights expert model.
        
        Args:
            stock (str): Stock ticker
            experts_list (List[str]): List of expert names
            learning_rate (float): Learning rate for weight updates
            movement_threshold (float): Threshold for stock movement
        """
        self.stock = stock
        self.experts = experts_list
        self.num_experts = len(experts_list)
        self.learning_rate = learning_rate
        self.movement_threshold = movement_threshold
        
        # Initialize weights uniformly
        self.weights = np.ones(self.num_experts) / self.num_experts
        
        # Track losses for each expert
        self.losses = np.zeros(self.num_experts)    
        self.cumulative_losses = np.zeros(self.num_experts)
        self.total_loss = 0
        self.weights_history = np.zeros((0, self.num_experts))
        self.expert_losses_history = np.zeros((0, self.num_experts))
        self.total_loss_history = []

        # Track predictions
        self.current_expert_predictions = np.zeros(self.num_experts)
        
    def get_prediction(self, expert_predictions: list[float]) -> float:
        """
        Get weighted average of expert predictions.
        
        Args:
            expert_predictions (List[float]): List of predictions from each expert
            
        Returns:
            float: Weighted average prediction
        """
        if len(expert_predictions) != self.num_experts:
            raise ValueError("Number of predictions must match number of experts")
            
        return np.dot(self.weights, expert_predictions)
    
    def calculate_expert_losses(self, expert_predictions: list[float], actual_movement: int) -> np.ndarray:
        """
        Calculate losses for each expert based on their predictions.
        
        Args:
            expert_predictions (List[float]): List of predictions from each expert
            actual_movement (int): 1 if stock increased, -1 if decreased
            
        Returns:
            np.ndarray: Array of losses for each expert
        """
        if actual_movement > self.movement_threshold:
            return np.array([(1 - pred)**2 for pred in expert_predictions])
        elif actual_movement < -self.movement_threshold:
            return np.array([(1 + pred)**2 for pred in expert_predictions])
        else:
            return np.array([(pred)**2 for pred in expert_predictions])
    
    def update_weights(self, expert_losses: np.ndarray):
        """
        Update weights based on expert losses.
        
        Args:
            expert_losses (np.ndarray): Array of losses for each expert
        """
        # Update weights using exponential punishment
        self.weights = self.weights * np.exp(-self.learning_rate * expert_losses)
        
        # Normalize weights
        self.weights = self.weights / np.sum(self.weights)
        
        # Update cumulative losses
        self.cumulative_losses += expert_losses
        self.total_loss += np.dot(self.weights, expert_losses)

        # Save history
        self.weights_history = np.vstack((self.weights_history, self.weights))

    def compute_movement(self, x: float, clipper = lambda x: 1/(1 + np.exp(-x/0.2))) -> int:
        """
        Compute movement based on a sigmoid function.
        
        Args:
            clipper (lambda): Sigmoid function
            x (float): Input value
            
        Returns:
            int: 1 if stock increased, -1 if decreased, smoothed by the clipper
        """
        return clipper(x)
    
    def forward(self, movement: int, clipper=lambda x: 1/(1 + np.exp(-x/0.2))) -> float:
        """
        Forward pass of the model. Computes loss, updates weights and history
        Args: movement (change in stock value (ratio) between -1 to 1)
        Returns: prediction of the model
        """
        # Note, this runs every night

        modified_movement = self.compute_movement(clipper, movement)
        expert_predictions = self.current_expert_predictions
        expert_losses = self.calculate_expert_losses(expert_predictions, modified_movement)
        self.update_weights(expert_losses)

        # Save history
        self.expert_losses_history = np.vstack((self.expert_losses_history, expert_losses))

        # Update the predictions, for the next day
        self.current_expert_predictions = 

    def get_parameters(self) -> dict:
        """
        Get current results and statistics.
        
        Returns:
            Dict: Dictionary containing current weights, losses, and other statistics
        """
        return {
            'weights': self.weights,
            # 'losses': self.losses,
            # 'cumulative_losses': self.cumulative_losses,
            'current_predictions': self.current_expert_predictions,
        }

In [None]:
def generate_expert_predictions(num_experts: int, method: str = 'gaussian', parameters=[0, 0.5]) -> list[float]:
    """
    Generate expert predictions using specified distribution.

    Args:
        num_experts (int): Number of experts
        method (str): Distribution to use ('gaussian' or 'uniform')

    Returns:
        List[float]: List of expert predictions
    """
    if method == 'gaussian':
        predictions = np.random.normal(parameters[0], parameters[1], num_experts)
    elif method == 'uniform':
        predictions = np.random.uniform(-1, 1, num_experts)
    else:
        raise ValueError("Method must be 'gaussian' or 'uniform'")

    # Clip predictions to [-1, 1]
    return np.clip(predictions, -1, 1)


# Only one expert from website right now, the others from sentiment
def get_expert_predictions(beautiful_soup_thing) -> int:
    """
    Get expert prediction from Trendlyne. Returns None if no rating found
    Simply takes the first rating it finds.
    """
    for list in beautiful_soup_thing:
        for inner_list in list:
            for word in inner_list:
                if word in ['Buy', 'buy', 'Bullish', 'bullish', 'Strong Buy', 'strong buy', 'Outperform', 'outperform']:
                    return 1
                elif word in ['Hold', 'hold', 'Neutral', 'neutral', 'Market Perform', 'market perform']:
                    return 0
                elif word in ['Sell', 'sell', 'Bearish', 'bearish', 'Strong Sell', 'strong sell', 'Underperform', 'underperform']:
                    return -1
    return None

def get_expert_prediction_yf(stock: str) -> int:
    """
    Get expert prediction from Yahoo Finance. Returns None if no rating found
    """
    data = yf.Ticker(stock)
    
    # Count the number of recommendations of each type
    recommendations = data.recommendations
    recommendation_counts = recommendations['To Grade'].value_counts()
    
    # Take the weighted average of the recommendations (Buy = 1, Hold = 0, Sell = -1)
    total_recommendations = recommendation_counts.sum()
    if total_recommendations == 0:
        return None
    else:
        return (recommendation_counts['Buy'] - recommendation_counts['Sell']) / total_recommendations
    
#print(get_expert_prediction_yf('AAPL'))

KeyError: 'To Grade'

In [None]:
def get_sentiment_predictions(ticker: str) -> list[float]:
    """Gets the scores of the several aspects in the ABSA Done by Pratyush and Sirjan"""
    # Get the sentiment scores from the database
    
    

In [5]:
def get_stock_price(ticker: str) -> float:
    """
    Get the current stock price for a given ticker.
    
    Args:
        ticker (str): Stock ticker symbol
        
    Returns:
        float: Current stock price
    """
    return yf.Ticker(ticker).history(period='1d')['Close'].iloc[0]

In [6]:
def get_pricing_history(ticker: str, period: str = '1mo') -> np.ndarray:
    """
    Get historical stock prices for a given ticker.
    
    Args:
        ticker (str): Stock ticker symbol
        period (str): Period to fetch
        
    Returns:
        np.ndarray: Array of historical stock prices
    """
    return yf.Ticker(ticker).history(period=period)['Close'].values


In [None]:
def simulate_stock_movement(days: int, total_growth: float = 0.15) -> List[float]:
    """
    Simulate stock price movements over specified days with target total growth.

    Args:
        days (int): Number of days to simulate
        total_growth (float): Target total growth percentage

    Returns:
        List[float]: Daily stock prices
    """
    # Calculate required daily growth rate
    daily_rate = (1 + total_growth)**(1/days) - 1

    # Generate random daily movements with noise
    noise = np.random.normal(0, 0.02, days)
    daily_returns = daily_rate + noise

    # Calculate cumulative prices
    prices = [100]  # Start at 100
    for ret in daily_returns:
        prices.append(prices[-1] * (1 + ret))

    return prices

In [None]:
def run_simulation(days: int = 15, num_experts: int = 5, total_growth: float = 0.15):
    """
    Run a complete simulation of the multiplicative weights model on simulated stock data.

    Args:
        days (int): Number of days to simulate
        num_experts (int): Number of experts
        total_growth (float): Target total growth percentage
    """
    # Initialize model
    experts = [f"Expert_{i+1}" for i in range(num_experts)]
    model = MultiplicativeWeightsExpert(experts)

    # # Generate stock prices
    # prices = simulate_stock_movement(days, total_growth)
    prices = get_pricing_history('AAPL', '1mo')
    daily_returns = [(prices[i+1] - prices[i])/prices[i] for i in range(len(prices)-1)]
    movements = np.array([1 if ret > 0 else -1 for ret in daily_returns])

    # Track predictions and performance
    model_predictions = []
    expert_predictions_history = []

    # Run simulation
    for day in range(days):
        # Generate expert predictions
        expert_predictions = generate_expert_predictions(num_experts)
        expert_predictions_history.append(expert_predictions)
        
        # Get model prediction
        model_prediction = model.get_prediction(expert_predictions)
        model_predictions.append(model_prediction)
        
        # Calculate losses and update weights   
        expert_losses = model.calculate_expert_losses(expert_predictions, movements[day])
        model.update_weights(expert_losses)
    
    # Plot results
    plt.figure(figsize=(15, 10))
    
    # Plot 1: Stock Price
    plt.subplot(3, 1, 1)
    plt.plot(prices)
    plt.title('Stock Price Evolution')
    plt.grid(True)
    
    # Plot 2: Expert Weights Evolution
    plt.subplot(3, 1, 2)
    weights_history = np.array(model.weights_history)
    
    for i in range(num_experts):
        plt.plot(weights_history[i], label=f'Expert {i+1}')
    plt.title('Expert Weights Evolution')
    plt.legend()
    plt.grid(True)
    
    # # Plot 3: Cumulative Losses
    # plt.subplot(3, 1, 3)
    # expert_losses_history = np.array(model.expert_losses_history)
    
    # for i in range(num_experts):
    #     plt.plot(np.cumsum(expert_losses_history[:, i]), label=f'Expert {i+1}')

    # plt.title('Expert Cumulative Losses')
    # plt.legend()
    # plt.grid(True)
    # # Plot model losses on the same graph
    # plt.plot(model.total_loss_history, label='Model Total Loss')
    # plt.legend()

    # plt.show()


    # Plot 4: Model Predictions vs. Actual Movements
    plt.figure(figsize=(10, 5))
    plt.plot(model_predictions[-100:], label='Model Predictions')
    plt.plot(movements[-100:], label='Actual Movements')
    plt.title('Model Predictions vs. Actual Movements')
    plt.legend()
    plt.grid(True)
    plt.show()

    
    # Print final results
    results = model.get_results()
    print("Final Results:")
    print(f"Total Loss: {results['total_loss']}")
    print(f"Final Weights: {results['weights']}")
    print(f"Expert Cumulative Losses: {results['cumulative_losses']}")
    print(f"Expert Losses: {results['losses']}")
    print(f"Model loss history: {model.total_loss_history}")
    print(f"Expert loss history: {model.expert_losses_history}")
    print(f"Expert predictions history: {expert_predictions_history}")


In [9]:
# Example usage
if __name__ == "__main__":
    # run_simulation(num_experts=7, total_growth=0.55)
    pass

In [None]:
from news_database_interface import interface

ticker = 'TCS.NS'

data_object = interface.NewsDatabase()

Table created with columns for each aspect with this query: 
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            stock_symbol TEXT,
            headline TEXT,
            published_date TEXT,
            url TEXT,
            embedding BLOB,
        Earnings REAL,
Revenue REAL,
Margins REAL,
Dividend REAL,
EBITDA REAL,
Debt REAL,
Sentiment REAL



In [None]:
a, b = data_object.to_dataframe()

In [12]:
a

Unnamed: 0,title,url,published_date,alias,stock_symbol,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
0,Adani Ports hits 5-month low; down 5% in two d...,https://news.google.com/rss/articles/CBMi1AFBV...,"Tue, 05 Nov 2024 18:36:00 GMT",Adani Ports,ADANIPORTS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.971452
1,Adani Ports shares tumble 4% after co shares O...,https://news.google.com/rss/articles/CBMi2AFBV...,"Mon, 04 Nov 2024 06:56:21 GMT",Adani Ports,ADANIPORTS.NS,0.000000,0.000000,-0.674736,-0.528282,0.000000,0.000000,-0.788633
2,Adani Ports & Special Economic Zone Sees 3.03%...,https://news.google.com/rss/articles/CBMi2wFBV...,"Wed, 06 Nov 2024 07:07:06 GMT",Adani Ports,ADANIPORTS.NS,0.589772,0.692051,0.574119,0.819750,0.635595,0.544984,0.947743
3,Adani Ports Q2 results preview: Here's what br...,https://news.google.com/rss/articles/CBMizwFBV...,"Tue, 29 Oct 2024 07:00:00 GMT",Adani Ports,ADANIPORTS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,Adani Ports Q2 results: Net profit jumps 37% Y...,https://news.google.com/rss/articles/CBMi5AFBV...,"Tue, 29 Oct 2024 07:00:00 GMT",Adani Ports,ADANIPORTS.NS,0.541222,0.526961,0.518274,0.000000,0.702986,0.000000,-0.937050
...,...,...,...,...,...,...,...,...,...,...,...,...
6064,"Q2 Update: IT giants Infosys, Wipro to announc...",https://news.google.com/rss/articles/CBMi-wFBV...,"Thu, 17 Oct 2024 07:00:00 GMT",Wipro,WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6065,"Wipro rises Tuesday, still underperforms marke...",https://news.google.com/rss/articles/CBMipAFBV...,"Tue, 05 Nov 2024 10:32:00 GMT",Wipro,WIPRO.NS,0.000000,0.000000,-0.525494,0.000000,0.000000,0.000000,0.881826
6066,Wipro stock rises 3% as board to mull bonus is...,https://news.google.com/rss/articles/CBMixgFBV...,"Mon, 14 Oct 2024 07:00:00 GMT",Wipro,WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.886129
6067,Exclusive: Two Wealthtime executives leave pla...,https://news.google.com/rss/articles/CBMi0AFBV...,"Fri, 08 Nov 2024 12:29:35 GMT",Wipro,WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [13]:
# drop all columns apart from published_date, stock_symbol, Earnings	Revenue	Margins	Dividend	EBITDA	Debt	Sentiment
a = a[['published_date', 'stock_symbol', 'Earnings', 'Revenue', 'Margins', 'Dividend', 'EBITDA', 'Debt', 'Sentiment']]
a

Unnamed: 0,published_date,stock_symbol,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
0,"Tue, 05 Nov 2024 18:36:00 GMT",ADANIPORTS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.971452
1,"Mon, 04 Nov 2024 06:56:21 GMT",ADANIPORTS.NS,0.000000,0.000000,-0.674736,-0.528282,0.000000,0.000000,-0.788633
2,"Wed, 06 Nov 2024 07:07:06 GMT",ADANIPORTS.NS,0.589772,0.692051,0.574119,0.819750,0.635595,0.544984,0.947743
3,"Tue, 29 Oct 2024 07:00:00 GMT",ADANIPORTS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Tue, 29 Oct 2024 07:00:00 GMT",ADANIPORTS.NS,0.541222,0.526961,0.518274,0.000000,0.702986,0.000000,-0.937050
...,...,...,...,...,...,...,...,...,...
6064,"Thu, 17 Oct 2024 07:00:00 GMT",WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6065,"Tue, 05 Nov 2024 10:32:00 GMT",WIPRO.NS,0.000000,0.000000,-0.525494,0.000000,0.000000,0.000000,0.881826
6066,"Mon, 14 Oct 2024 07:00:00 GMT",WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.886129
6067,"Fri, 08 Nov 2024 12:29:35 GMT",WIPRO.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [14]:
data_TCS = a[a['stock_symbol'] == 'TCS.NS']
data_TCS

Unnamed: 0,published_date,stock_symbol,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
5377,"Sat, 09 Nov 2024 02:07:00 GMT",TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.923310
5378,"Thu, 07 Nov 2024 14:50:41 GMT",TCS.NS,0.714131,0.797167,0.762456,0.725893,0.737603,0.000000,0.908883
5379,"Thu, 07 Nov 2024 03:28:45 GMT",TCS.NS,0.565668,0.626987,0.635346,0.717755,0.672891,0.000000,0.721399
5380,"Mon, 04 Nov 2024 08:00:00 GMT",TCS.NS,0.517267,0.000000,0.000000,0.000000,0.000000,0.000000,0.922962
5381,"Fri, 08 Nov 2024 11:05:47 GMT",TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.886061
...,...,...,...,...,...,...,...,...,...
5564,"Fri, 08 Nov 2024 16:53:42 GMT",TCS.NS,0.000000,0.570948,0.000000,0.670304,0.616768,0.000000,0.000000
5565,"Thu, 10 Oct 2024 07:00:00 GMT",TCS.NS,0.681163,0.945296,0.582992,0.686208,0.555048,0.559839,0.953413
5566,"Fri, 11 Oct 2024 07:00:00 GMT",TCS.NS,-0.726640,0.000000,-0.503861,0.000000,0.000000,-0.515421,-0.970420
5567,"Sat, 02 Nov 2024 04:39:00 GMT",TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [15]:
# Change the published_date column to contain a pd datetime format, only date and no time
data_TCS['published_date'] = pd.to_datetime(data_TCS['published_date']).dt.date
data_TCS

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_TCS['published_date'] = pd.to_datetime(data_TCS['published_date']).dt.date


Unnamed: 0,published_date,stock_symbol,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
5377,2024-11-09,TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.923310
5378,2024-11-07,TCS.NS,0.714131,0.797167,0.762456,0.725893,0.737603,0.000000,0.908883
5379,2024-11-07,TCS.NS,0.565668,0.626987,0.635346,0.717755,0.672891,0.000000,0.721399
5380,2024-11-04,TCS.NS,0.517267,0.000000,0.000000,0.000000,0.000000,0.000000,0.922962
5381,2024-11-08,TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.886061
...,...,...,...,...,...,...,...,...,...
5564,2024-11-08,TCS.NS,0.000000,0.570948,0.000000,0.670304,0.616768,0.000000,0.000000
5565,2024-10-10,TCS.NS,0.681163,0.945296,0.582992,0.686208,0.555048,0.559839,0.953413
5566,2024-10-11,TCS.NS,-0.726640,0.000000,-0.503861,0.000000,0.000000,-0.515421,-0.970420
5567,2024-11-02,TCS.NS,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [16]:
# Now aggregate the vlaues by date (time is irrelevant), take mean of the values
# Remove stock symbol first
data_TCS = data_TCS.drop(columns=['stock_symbol'])

In [17]:
data_TCS = data_TCS.groupby('published_date').mean()
data_TCS

Unnamed: 0_level_0,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
published_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-04-01,0.846300,0.778310,0.831192,0.772940,0.785523,0.000000,0.000000
2024-04-12,0.476764,0.806355,0.769073,0.767857,0.796533,0.000000,0.474975
2024-04-14,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2024-04-15,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.771507
2024-04-17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...
2024-11-05,0.000000,0.165795,0.000000,0.187608,0.166871,0.000000,0.757866
2024-11-06,0.101903,0.150384,0.193905,0.253652,0.195374,0.092190,0.499939
2024-11-07,0.251735,0.255348,0.256442,0.270262,0.292815,0.001383,0.497904
2024-11-08,0.000000,0.028547,0.000000,0.033515,0.030838,0.000000,0.035984


In [18]:
# Now replace each row by the average of the last 7 days, If from the first 7 days, then take average of all days till then
data_TCS_lol = data_TCS.copy()

for i in range(7, len(data_TCS)):
    data_TCS.iloc[i] = data_TCS_lol.iloc[i-7:i].mean()

data_TCS_lol

Unnamed: 0_level_0,Earnings,Revenue,Margins,Dividend,EBITDA,Debt,Sentiment
published_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-04-01,0.846300,0.778310,0.831192,0.772940,0.785523,0.000000,0.000000
2024-04-12,0.476764,0.806355,0.769073,0.767857,0.796533,0.000000,0.474975
2024-04-14,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2024-04-15,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.771507
2024-04-17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...
2024-11-05,0.000000,0.165795,0.000000,0.187608,0.166871,0.000000,0.757866
2024-11-06,0.101903,0.150384,0.193905,0.253652,0.195374,0.092190,0.499939
2024-11-07,0.251735,0.255348,0.256442,0.270262,0.292815,0.001383,0.497904
2024-11-08,0.000000,0.028547,0.000000,0.033515,0.030838,0.000000,0.035984
