In [1]:
import re
import numpy as np
import pandas as pd
import yfinance as yf
from transformers import pipeline
import pandas as pd
import requests
import json
from datetime import datetime
# pd.set_option("display.max_rows", None)
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_colwidth", None)
from datetime import datetime, timezone
import random
from datasets import load_dataset
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline
import torch

In [2]:
model_id = "deepseek-ai/deepseek-llm-7b-chat"
torch.cuda.set_device(2)  # Sets default to GPU 0
device=torch.device("cuda:2")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map={"": 2},             # auto-distributes across GPUs
    torch_dtype="auto",            # picks bf16 or fp16 depending on availability
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
class technical_analyst:

    def __init__(self, tickers, start_date, end_date):
        self.tickers = list(tickers) if isinstance(tickers,str) else tickers
        self.start_date = start_date
        self.end_date = end_date
        self.generate_df()
        self.generate_indicators()
        self.generate_technical_prompt()
        
    def generate_df(self):
        self.data = yf.download(self.tickers, start=self.start_date, end=self.end_date)

    def compute_rsi(self, close, period=14):
        delta = close.diff()

        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)

        avg_gain = gain.rolling(window=period).mean()
        avg_loss = loss.rolling(window=period).mean()

        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    def compute_obv(self, close, volume):
        direction = close.diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
        obv = (volume * direction).fillna(0).cumsum()
        return obv
    
    def generate_indicators(self):
        self.indicators_processed={}
        for ticker in self.tickers:
            temp=pd.DataFrame()
            temp["SMA_5"] = self.data['Close'][ticker].rolling(5).mean()
            temp["SMA_15"] = self.data['Close'][ticker].rolling(15).mean()
            temp["SMA_50"] = self.data['Close'][ticker].rolling(50).mean()

            temp['EMA_5'] = self.data['Close'][ticker].ewm(span=5).mean()
            temp['EMA_10'] = self.data['Close'][ticker].ewm(span=10).mean()
            temp['EMA_50'] = self.data['Close'][ticker].ewm(span=50).mean()
            temp["Date"] = self.data['Close'][ticker].index
            temp["RSI"]=self.compute_rsi(self.data['Close'][ticker])
            temp["OBV"]=self.compute_obv(self.data['Close'][ticker], self.data['Volume'][ticker])
            self.indicators_processed[ticker]=temp
            
    def generate_technical_prompt(self):

        prompt = f"""
You are a technical investment analyst. Analyze the recent technical performance of the following stocks and give an investment recommendation.
Your task:
Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being short and 1 being Strong buy. 
Make these decisions based solely on the technical indicators given below for each stock. Give a one line explanation for each decison.
"""
        for ticker in self.tickers:
            latest=self.indicators_processed[ticker].iloc[-1]
            partial_stats= f"""Technical Indicators for {ticker}:
            SMA 5: {latest['SMA_5']:.2f}
            SMA 15: {latest['SMA_15']:.2f}
            SMA 50: {latest['SMA_50']:.2f}

            EMA 5: {latest['EMA_5']:.2f}
            EMA 10: {latest['EMA_10']:.2f}
            EMA 50: {latest['EMA_50']:.2f}

            RSI: {latest['RSI']:.2f}
            OBV: {latest['OBV']:,.0f}
            /n/n"""
            prompt+=partial_stats
        self.prompt=prompt
        return self.prompt 
    
    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )

        outputs = generator(
            self.prompt,
            max_new_tokens=1000,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only


In [15]:
techni=technical_analyst(["AAPL","NVDA"],start_date="2020-01-01", end_date="2021-01-01")
print(techni.prompt)
response=techni.generate_response()
print(response)

  self.data = yf.download(self.tickers, start=self.start_date, end=self.end_date)
[*********************100%***********************]  2 of 2 completed
Device set to use cuda:2



You are a technical investment analyst. Analyze the recent technical performance of the following stocks and give an investment recommendation.
Your task:
Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being short and 1 being Strong buy. 
Make these decisions based solely on the technical indicators given below for each stock. Give a one line explanation for each decison.
Technical Indicators for AAPL:
            SMA 5: 130.56
            SMA 15: 125.99
            SMA 50: 117.89

            EMA 5: 129.75
            EMA 10: 128.20
            EMA 50: 119.89

            RSI: 67.88
            OBV: 1,242,778,600
            /n/nTechnical Indicators for NVDA:
            SMA 5: 12.97
            SMA 15: 13.11
            SMA 50: 13.24

            EMA 5: 13.02
            EMA 10: 13.06
            EMA 50: 13.13

            RSI: 52.75
            OBV: 16,407,164,000
            /n/n
AAPL: Strong Buy
NVDA: Strong Buy


In [28]:
class fundamental_analyst:

    def __init__(self, tickers, start_date, end_date):
        self.tickers = list(tickers) if isinstance(tickers,str) else tickers
        self.start_date = start_date
        self.end_date = end_date

        start_year = datetime.strptime(start_date, "%Y-%m-%d").year
        end_year = datetime.strptime(end_date, "%Y-%m-%d").year
        self.years = list(range(start_year, end_year + 1))
        data_processed={}
        for ticker in self.tickers:
            metric_data, gaap_data=self.get_data(ticker)
            data_processed[ticker]=self.get_financial_info(metric_data,gaap_data)
        self.data_processed=data_processed
        self.generate_prompt()
        
    def get_data(self,ticker):
        api_key = "d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
        url = f"https://finnhub.io/api/v1/stock/metric?symbol={ticker}&token={api_key}"
        response = requests.get(url)
        metric_data=response.json()
        url = f"https://finnhub.io/api/v1/stock/financials-reported?symbol={ticker}&token={api_key}"
        response = requests.get(url)
        gaap_data=response.json()
        return metric_data,gaap_data

    def find_us_gaap_entry(self, gaap_data, parameter, year):
        for dic in gaap_data['data']:
            if dic.get('year') == year:
                for section in ['ic', 'bs', 'cf']:  # income statement, balance sheet, cash flow
                    for entry in dic.get('report', {}).get(section, []):
                        if parameter in entry.values():
                            return entry.get('value')
        return None

    def find_metric_by_year(self,metric_data, parameter, year):
        series = metric_data.get('series', {}).get('annual', {}).get(parameter, [])
        for item in series:
            if item.get('period', '').startswith(str(year)):
                return item.get('v')  # assuming value is under 'v'
        return None

    def get_financial_info(self,metric_data,gaap_data):
        financials = {
        "EPS": [],
        "Net_Income": [],
        "Gross_Profit": [],
        "Revenue": [],
        "Total_Assets": [],
        "Total_Liabilities": [],
        "Shareholders_Equity": [],
        "Operating_Cash_Flow": [],
        "Investing_Cash_Flow": [],
        "Financing_Cash_Flow": [],
        "P_E": [],
        "ROA": [],
        "ROE": []
    }

        for year in self.years:
            financials["EPS"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_EarningsPerShareDiluted', year))
            financials["Net_Income"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetIncomeLoss', year))
            financials["Gross_Profit"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_GrossProfit', year))
            financials["Revenue"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_RevenueFromContractWithCustomerExcludingAssessedTax', year))
            financials["Total_Assets"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_Assets', year))
            financials["Total_Liabilities"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_Liabilities', year))
            financials["Shareholders_Equity"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_StockholdersEquity', year))
            financials["Operating_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInOperatingActivities', year))
            financials["Investing_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInInvestingActivities', year))
            financials["Financing_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInFinancingActivities', year))
            financials["P_E"].append(self.find_metric_by_year(metric_data, 'pe', year))
            financials["ROA"].append(self.find_metric_by_year(metric_data, 'roa', year))
            financials["ROE"].append(self.find_metric_by_year(metric_data, 'roe', year))

        return financials

    def generate_prompt(self):
        prompt = f"""
    Pretend that you are a fundamental investment analyst. Analyze the financial performance of the following companies and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
    You Must justify your decision in 4–6 bullet points using financial reasoning. Consider all the financial information shared. Only use the numerical data given. 
    Do not add assumptions about company operations, reputation, or strategy. You must generate one decision per company."""
        for ticker in self.tickers:
            financials=self.data_processed[ticker]
            partial_stats=f"""Financials for {ticker}:\n"""
            for i, year in enumerate(self.years):
                partial_stats += f"""
                Year: {year}
                Income Statement:
                Revenue: ${financials["Revenue"][i]:,.0f}
                Gross Profit: ${financials["Gross_Profit"][i]:,.0f}
                Net Income: ${financials["Net_Income"][i]:,.0f}
                EPS (Diluted): {financials["EPS"][i]:.2f}

                Balance Sheet:
                Total Assets: ${financials["Total_Assets"][i]:,.0f}
                Total Liabilities: ${financials["Total_Liabilities"][i]:,.0f}
                Shareholders' Equity: ${financials["Shareholders_Equity"][i]:,.0f}

                Cash Flow:
                Operating Cash Flow: ${financials["Operating_Cash_Flow"][i]:,.0f}
                Investing Cash Flow: ${financials["Investing_Cash_Flow"][i]:,.0f}
                Financing Cash Flow: ${financials["Financing_Cash_Flow"][i]:,.0f}

                Valuation and Ratios:
                P/E Ratio: {financials["P_E"][i]:.2f}
                ROA: {financials["ROA"][i]:.2%}
                ROE: {financials["ROE"][i]:.2%}
                """
            prompt+=partial_stats
        prompt += "\nBased on this, what is your investment recommendation? Pick one action candidate."
        self.prompt = prompt
        return prompt

    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )
        outputs = generator(
            self.prompt,
            max_new_tokens=500,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only

In [29]:
techni=fundamental_analyst(["AAPL","NVDA"],start_date="2020-01-01", end_date="2021-01-01")
print(techni.prompt)
response=techni.generate_response()
print(response)

Device set to use cuda:2



    Pretend that you are a fundamental investment analyst. Analyze the financial performance of the following companies and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
    You Must justify your decision in 4–6 bullet points using financial reasoning. Consider all the financial information shared. Only use the numerical data given. 
    Do not add assumptions about company operations, reputation, or strategy. You must generate one decision per company.Financials for AAPL:

                Year: 2020
                Income Statement:
                Revenue: $274,515,000,000
                Gross Profit: $104,956,000,000
                Net Income: $57,411,000,000
                EPS (Diluted): 3.28

                Balance Sheet:
                Total Assets: $323,888,000,000
                Total Liabilities: $258,549,000,000
                Shareholders' Equity: $65,339,000,000

                Cash Flow:
                Operating Cash Flow: $80,674,000,000
       

In [19]:
file_path = f"/home/f20222001/test-venv/Portfolio/sp500_news/sp500_news/{"AAPL"}.jsonl"
with open(file_path, "r") as f:
            for line in f:
                obj = json.loads(line)

In [23]:
class news_analyst:

    def __init__ (self,tickers, companies, start_date, end_date):
        self.tickers=[tickers] if isinstance(tickers,str) else tickers
        self.companies=[companies] if isinstance(companies,str) else companies
        self.start_date=start_date
        self.end_date=end_date
        news_collection={}
        for i,ticker in enumerate(self.tickers):
            news_collection[ticker]=self.get_news_articles(ticker,companies[i])
        self.news_collection=news_collection
        self.generate_news_prompt()
        
    def get_news_articles(self,ticker,company):
        file_path = f"/home/f20222001/test-venv/Portfolio/sp500_news/sp500_news/{ticker}.jsonl"
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        end_dt   = datetime.strptime(self.end_date, "%Y-%m-%d")

        want = 10
        titles, seen = [], set()

        # 1) Local JSONL (assumed to exist)
        with open(file_path, "r") as f:
            for line in f:
                obj = json.loads(line)
                title = (obj.get("Article_title") or "").strip()
                if not title:
                    continue
                article_dt = datetime.strptime(obj["Date"], "%Y-%m-%d")
                if start_dt <= article_dt <= end_dt and company.lower() in title.lower():
                    if title not in seen:
                        seen.add(title)
                        titles.append(title)

        # 2) If fewer than 10, top up with Finnhub for the same window
        if len(titles) < want:
            url = (
                "https://finnhub.io/api/v1/company-news"
                f"?symbol={ticker}&from={self.start_date}&to={self.end_date}"
                f"&token=d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
            )
            r = requests.get(url, timeout=10)
            if r.ok:
                for item in (r.json() or []):
                    title = (item.get("headline") or "").strip()
                    if not title or title in seen:
                        continue
                    ts = item.get("datetime")
                    if isinstance(ts, (int, float)):
                        art_date = datetime.fromtimestamp(ts, tz=timezone.utc).date()
                        if start_dt.date() <= art_date <= end_dt.date() and company.lower() in title.lower():
                            seen.add(title)
                            titles.append(title)
                            if len(titles) >= want:
                                break

        # 3) Cap at 10 (randomize if overshoot)
        if len(titles) > want:
            titles = random.sample(titles, want)

        selected_news = titles
        return selected_news
    
    def generate_news_prompt(self):
        prompt = f"""
Pretend that you are a sentiment and headlines investment analyst. Analyze the recent technical performance of the following companies and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
Justify your decision in 4–6 bullet points using sentiment analysis. Only use the headlines given. 
Do not add assumptions about company fundamentals, operations, or strategy. Give a recommendation per company.

Headlines:
"""
        for ticker in self.tickers:
            prompt+=f""" News for {ticker}\n"""
            for i, item in enumerate(self.news_collection[ticker]):
                prompt+=item
                prompt+='\n'
        prompt+="Based on this, what is your investment recommendation? Pick one action candidate."
        self.prompt=prompt
        return self.prompt
    
    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )

        outputs = generator(
            self.prompt,
            max_new_tokens=500,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only

In [24]:
techni=news_analyst(["AAPL","NVDA"],companies=["Apple","Nvidia"],start_date="2022-01-01", end_date="2023-01-01")
print(techni.prompt)
response=techni.generate_response()
print(response)

Device set to use cuda:2



Pretend that you are a sentiment and headlines investment analyst. Analyze the recent technical performance of the following companies and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
Justify your decision in 4–6 bullet points using sentiment analysis. Only use the headlines given. 
Do not add assumptions about company fundamentals, operations, or strategy. Give a recommendation per company.

Headlines:
 News for AAPL
Microsoft (MSFT) Brings Apple iCloud Integration to Windows
Brazil orders Apple to suspend iPhone sales without charger
Looking for a Dividend Growth Stock? Look No Further Than Apple
Baseball-New York AG asks MLB and Apple to let more fans watch Yankees game
Is Apple a Must-Own Stock in 2023?
Apple supplier Foxconn quadruples bonuses to staff hit by China COVID lockdown
Nasdaq, S&P 500 open higher on positive forecasts from Apple, Amazon
Apple's CEO Bashes the Metaverse, and Nvidia's CEO Embraces It: What It Means for Investors
UK begins investigation 

In [25]:
techni.companies

['Apple', 'Nvidia']