In [1]:
import re
import numpy as np
import pandas as pd
import yfinance as yf
from transformers import pipeline
import pandas as pd
import requests
import json
from datetime import datetime
# pd.set_option("display.max_rows", None)
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_colwidth", None)
from datetime import datetime, timezone
import random
from datasets import load_dataset
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline
import torch

In [2]:
model_id = "Qwen/Qwen2.5-14B-Instruct"
torch.cuda.set_device(2)  # Sets default to GPU 0
device=torch.device("cuda:2")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    
    model_id,
    device_map={"": 2},             # auto-distributes across GPUs
    torch_dtype="auto",            # picks bf16 or fp16 depending on availability
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [3]:
class technical_analyst:

    def __init__(self, tickers, start_date, end_date):
        self.tickers = list(tickers) if isinstance(tickers,str) else tickers
        self.start_date = start_date
        self.end_date = end_date
        self.generate_df()
        self.generate_indicators()
        self.generate_technical_prompt()
        
    def generate_df(self):
        self.data = yf.download(self.tickers, start=self.start_date, end=self.end_date)

    def compute_rsi(self, close, period=14):
        delta = close.diff()

        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)

        avg_gain = gain.rolling(window=period).mean()
        avg_loss = loss.rolling(window=period).mean()

        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    def compute_obv(self, close, volume):
        direction = close.diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
        obv = (volume * direction).fillna(0).cumsum()
        return obv
    
    def generate_indicators(self):
        self.indicators_processed={}
        for ticker in self.tickers:
            temp=pd.DataFrame()
            temp["SMA_5"] = self.data['Close'][ticker].rolling(5).mean()
            temp["SMA_15"] = self.data['Close'][ticker].rolling(15).mean()
            temp["SMA_50"] = self.data['Close'][ticker].rolling(50).mean()

            temp['EMA_5'] = self.data['Close'][ticker].ewm(span=5).mean()
            temp['EMA_10'] = self.data['Close'][ticker].ewm(span=10).mean()
            temp['EMA_50'] = self.data['Close'][ticker].ewm(span=50).mean()
            temp["Date"] = self.data['Close'][ticker].index
            temp["RSI"]=self.compute_rsi(self.data['Close'][ticker])
            temp["OBV"]=self.compute_obv(self.data['Close'][ticker], self.data['Volume'][ticker])
            self.indicators_processed[ticker]=temp
            
    def generate_technical_prompt(self):

        prompt = f""" Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being Short and 1 being Strong Buy. 
Make these decisions based solely on the technical indicators given below for each stock. Return only the vector, no explanation is needed.
 You MUST return the vector at the end in this format [company1,company2,...companyn]: [Value1,Value2,Valuen]"""
        for ticker in self.tickers:
            latest=self.indicators_processed[ticker].iloc[-1]
            partial_stats= f"""Technical Indicators for {ticker}:
            SMA 5: {latest['SMA_5']:.2f}
            SMA 15: {latest['SMA_15']:.2f}
            SMA 50: {latest['SMA_50']:.2f}

            EMA 5: {latest['EMA_5']:.2f}
            EMA 10: {latest['EMA_10']:.2f}
            EMA 50: {latest['EMA_50']:.2f}

            RSI: {latest['RSI']:.2f}
            OBV: {latest['OBV']:,.0f}
            /n/n"""
            prompt+=partial_stats
        self.prompt=prompt
        return self.prompt 
    
    def generate_response(self):
        prompt = self.prompt
        messages = [
            {"role": "system", "content": "You are an expert technical analyst. Analyze stocks to the best of your ability."},
            {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        self.response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return self.response


In [40]:
techni=technical_analyst(["BTC-USD","LMT"],start_date="2020-01-01", end_date="2021-01-01")
#print(techni.prompt)
response=techni.generate_response()
print(response)

  self.data = yf.download(self.tickers, start=self.start_date, end=self.end_date)
[*********************100%***********************]  2 of 2 completed


[1,-0.5]


In [38]:
techni.indicators_processed

{'MSFT':                  SMA_5      SMA_15      SMA_50       EMA_5      EMA_10  \
 Date                                                                     
 2020-01-02         NaN         NaN         NaN  152.791107  152.791107   
 2020-01-03         NaN         NaN         NaN  151.649591  151.744717   
 2020-01-06         NaN         NaN         NaN  151.473856  151.557339   
 2020-01-07         NaN         NaN         NaN  150.819794  151.011077   
 2020-01-08  151.428912         NaN         NaN  151.383041  151.377365   
 ...                ...         ...         ...         ...         ...   
 2020-12-24  213.203101  208.684190  206.084928  212.906999  211.147068   
 2020-12-28  214.427844  209.363533  206.197858  214.025361  212.077071   
 2020-12-29  214.727774  209.995451  206.295213  214.511363  212.696397   
 2020-12-30  214.293256  210.358835  206.449398  214.043871  212.771396   
 2020-12-31  214.562427  211.039459  206.609566  213.969336  212.962099   
 
               

In [35]:
class fundamental_analyst:

    def __init__(self, tickers, start_date, end_date):
        self.tickers = list(tickers) if isinstance(tickers,str) else tickers
        self.start_date = start_date
        self.end_date = end_date

        start_year = datetime.strptime(start_date, "%Y-%m-%d").year
        end_year = datetime.strptime(end_date, "%Y-%m-%d").year
        self.years = list(range(start_year, end_year + 1))
        data_processed={}
        for ticker in self.tickers:
            metric_data, gaap_data=self.get_data(ticker)
            data_processed[ticker]=self.get_financial_info(metric_data,gaap_data)
        self.data_processed=data_processed
        self.generate_prompt()
        
    def get_data(self,ticker):
        api_key = "d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
        url = f"https://finnhub.io/api/v1/stock/metric?symbol={ticker}&token={api_key}"
        response = requests.get(url)
        metric_data=response.json()
        url = f"https://finnhub.io/api/v1/stock/financials-reported?symbol={ticker}&token={api_key}"
        response = requests.get(url)
        gaap_data=response.json()
        return metric_data,gaap_data

    def find_us_gaap_entry(self, gaap_data, parameter, year):
        for dic in gaap_data['data']:
            if dic.get('year') == year:
                for section in ['ic', 'bs', 'cf']:  # income statement, balance sheet, cash flow
                    for entry in dic.get('report', {}).get(section, []):
                        if parameter in entry.values():
                            return entry.get('value')
        return None

    def find_metric_by_year(self,metric_data, parameter, year):
        series = metric_data.get('series', {}).get('annual', {}).get(parameter, [])
        for item in series:
            if item.get('period', '').startswith(str(year)):
                return item.get('v')  # assuming value is under 'v'
        return None

    def get_financial_info(self,metric_data,gaap_data):
        financials = {
        "EPS": [],
        "Net_Income": [],
        "Gross_Profit": [],
        "Revenue": [],
        "Total_Assets": [],
        "Total_Liabilities": [],
        "Shareholders_Equity": [],
        "Operating_Cash_Flow": [],
        "Investing_Cash_Flow": [],
        "Financing_Cash_Flow": [],
        "P_E": [],
        "ROA": [],
        "ROE": []
    }

        for year in self.years:
            financials["EPS"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_EarningsPerShareDiluted', year))
            financials["Net_Income"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetIncomeLoss', year))
            financials["Gross_Profit"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_GrossProfit', year))
            financials["Revenue"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_RevenueFromContractWithCustomerExcludingAssessedTax', year))
            financials["Total_Assets"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_Assets', year))
            financials["Total_Liabilities"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_Liabilities', year))
            financials["Shareholders_Equity"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_StockholdersEquity', year))
            financials["Operating_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInOperatingActivities', year))
            financials["Investing_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInInvestingActivities', year))
            financials["Financing_Cash_Flow"].append(self.find_us_gaap_entry(gaap_data, 'us-gaap_NetCashProvidedByUsedInFinancingActivities', year))
            financials["P_E"].append(self.find_metric_by_year(metric_data, 'pe', year))
            financials["ROA"].append(self.find_metric_by_year(metric_data, 'roa', year))
            financials["ROE"].append(self.find_metric_by_year(metric_data, 'roe', year))

        return financials
    
    def fmt(self, value, fmt_str):
        return format(value, fmt_str) if value is not None else "N/A"

    def generate_prompt(self):
        prompt = f""" Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being Short and 1 being Strong Buy. 
Make these decisions based solely on the fundamental indicators given below for each stock. Return only the vector, no explanation is needed.
 You MUST return the vector at the end in this format [company1,company2,...companyn]: [Value1,Value2,Valuen]\n\n"""
        for ticker in self.tickers:
            financials=self.data_processed[ticker]
            partial_stats=f"""Financials for {ticker}:\n"""
            for i, year in enumerate(self.years):
                partial_stats += f"""
                Year: {year}
                Income Statement:
                Revenue: ${self.fmt(financials["Revenue"][i], ",.0f")}
                Gross Profit: ${self.fmt(financials["Gross_Profit"][i], ",.0f")}
                Net Income: ${self.fmt(financials["Net_Income"][i], ",.0f")}
                EPS (Diluted): {self.fmt(financials["EPS"][i], ".2f")}

                Balance Sheet:
                Total Assets: ${self.fmt(financials["Total_Assets"][i], ",.0f")}
                Total Liabilities: ${self.fmt(financials["Total_Liabilities"][i], ",.0f")}
                Shareholders' Equity: ${self.fmt(financials["Shareholders_Equity"][i], ",.0f")}

                Cash Flow:
                Operating Cash Flow: ${self.fmt(financials["Operating_Cash_Flow"][i], ",.0f")}
                Investing Cash Flow: ${self.fmt(financials["Investing_Cash_Flow"][i], ",.0f")}
                Financing Cash Flow: ${self.fmt(financials["Financing_Cash_Flow"][i], ",.0f")}

                Valuation and Ratios:
                P/E Ratio: {self.fmt(financials["P_E"][i], ".2f")}
                ROA: {self.fmt(financials["ROA"][i], ".2%")}
                ROE: {self.fmt(financials["ROE"][i], ".2%")}
                """
            prompt+=partial_stats+"\n\n"
        prompt += "\nBased on this, what is your investment recommendation? Pick one action candidate."
        self.prompt = prompt
        return prompt

    def generate_response(self):
        prompt = self.prompt
        messages = [
            {"role": "system", "content": "You are an expert fundamental analyst. Analyze stocks to the best of your ability."},
            {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        self.response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return self.response

In [42]:
funda=fundamental_analyst(["GLD"],start_date="2023-01-01", end_date="2024-01-01")
print(funda.prompt)
response=funda.generate_response()
print(response)

 Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being Short and 1 being Strong Buy. 
Make these decisions based solely on the fundamental indicators given below for each stock. Return only the vector, no explanation is needed.
 You MUST return the vector at the end in this format [company1,company2,...companyn]: [Value1,Value2,Valuen]

Financials for GLD:

                Year: 2023
                Income Statement:
                Revenue: $N/A
                Gross Profit: $N/A
                Net Income: $5,548,188,000
                EPS (Diluted): N/A

                Balance Sheet:
                Total Assets: $52,539,161,000
                Total Liabilities: $17,892,000
                Shareholders' Equity: $N/A

                Cash Flow:
                Operating Cash Flow: $0
                Investing Cash Flow: $N/A
                Financing Cash Flow: $N/A

                Valuation and Ratios:
                P/E Ratio: N/A
     

In [39]:
funda.data_processed

{'LLY': {'EPS': [5, 11],
  'Net_Income': [5240400000.0, 10590000000.0],
  'Gross_Profit': [None, None],
  'Revenue': [None, None],
  'Total_Assets': [64006300000, 78714900000],
  'Total_Liabilities': [None, None],
  'Shareholders_Equity': [10771900000, 14192100000],
  'Operating_Cash_Flow': [4240100000, 8817900000],
  'Investing_Cash_Flow': [-7152700000, -9301500000],
  'Financing_Cash_Flow': [3495600000, 1230100000],
  'P_E': [105.5969, 69.2041],
  'ROA': [0.0819, 0.1345],
  'ROE': [0.4865, 0.7462]}}

In [104]:
class news_analyst:

    def __init__ (self,tickers, companies, start_date, end_date):
        self.tickers=[tickers] if isinstance(tickers,str) else tickers
        self.companies=[companies] if isinstance(companies,str) else companies
        self.start_date=start_date
        self.end_date=end_date
        news_collection={}
        for i,ticker in enumerate(self.tickers):
            news_collection[ticker]=self.get_news_articles(ticker,companies[i])
        self.news_collection=news_collection
        self.generate_news_prompt()
        
    def get_news_articles(self, ticker, company):
        file_path = f"/home/f20222001/test-venv/Portfolio/sp500_news/sp500_news/{ticker}.jsonl"
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        end_dt   = datetime.strptime(self.end_date, "%Y-%m-%d")

        want = 10
        titles, seen = [], set()

        # 1) Try local JSONL
        try:
            with open(file_path, "r") as f:
                for line in f:
                    obj = json.loads(line)
                    title = (obj.get("Article_title") or "").strip()
                    if not title:
                        continue
                    article_dt = datetime.strptime(obj["Date"], "%Y-%m-%d")
                    if start_dt <= article_dt <= end_dt and company.lower() in title.lower():
                        if title not in seen:
                            seen.add(title)
                            titles.append(title)
        except FileNotFoundError:
            # If file not found, skip to API
            pass

        # 2) If fewer than 10, top up with Finnhub
        if len(titles) < want:
            try:
                if ticker.upper() in {"BTC-USD", "ETH-USD", "SOL-USD"}:
                    # Crypto: use general crypto news
                    url = f"https://finnhub.io/api/v1/news?category=crypto&token=d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
                    r = requests.get(url, timeout=10)
                    if r.ok:
                        for item in (r.json() or []):
                            title = (item.get("headline") or "").strip()
                            #print(title)
                            if not title or title in seen:
                                continue
                            ts = item.get("datetime")
                            if isinstance(ts, (int, float)):
                                art_date = datetime.fromtimestamp(ts, tz=timezone.utc).date()
                                if start_dt.date() <= art_date <= end_dt.date():#and company.lower() in title.lower():
                                    print(title)
                                    seen.add(title)
                                    titles.append(title)
                                    if len(titles) >= want:
                                        break
                else:
                    # Equities/ETFs: use company-news
                    url = (
                        "https://finnhub.io/api/v1/company-news"
                        f"?symbol={ticker}&from={self.start_date}&to={self.end_date}"
                        f"&token=d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
                    )
                    r = requests.get(url, timeout=10)
                    if r.ok:
                        for item in (r.json() or []):
                            title = (item.get("headline") or "").strip()
                            if not title or title in seen:
                                continue
                            ts = item.get("datetime")
                            if isinstance(ts, (int, float)):
                                art_date = datetime.fromtimestamp(ts, tz=timezone.utc).date()
                                if start_dt.date() <= art_date <= end_dt.date() and company.lower() in title.lower():
                                    seen.add(title)
                                    titles.append(title)
                                    if len(titles) >= want:
                                        break
            except requests.RequestException:
                pass

        # 3) Cap at 10 (randomize if overshoot)
        if len(titles) > want:
            titles = random.sample(titles, want)

        self.selected_news = titles
        return self.selected_news
    
    def generate_news_prompt(self):
        prompt = f""" Choose a recommendation for each stock. Respond in a vector of floats between [-1,1], -1 being Short and 1 being Strong Buy. 
Make these decisions based solely on the news headlines and sentiment given below for each stock.Return only the vector, no explanation needed.
 You MUST return the vector at the end in this format [company1,company2,...companyn]: [Value1,Value2,Valuen]

Headlines:
"""
        for ticker in self.tickers:
            prompt+=f""" News for {ticker}\n"""
            for i, item in enumerate(self.news_collection[ticker]):
                prompt+=item
                prompt+='\n'
        prompt+="Based on this, what is your investment recommendation? Pick one action candidate."
        self.prompt=prompt
        return self.prompt
    
    def generate_response(self):
        prompt = self.prompt
        messages = [
            {"role": "system", "content": "You are an expert news and sentiment analyst. Analyze stocks to the best of your ability."},
            {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        self.response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return self.response

In [111]:
newsa=news_analyst(["GLD"],companies=["Gold"],start_date="2025-04-01", end_date="2025-07-01")
#print(techni.prompt)
response=newsa.generate_response()
print(response)

[0.8]


In [112]:
newsa.selected_news

['Gold Surges Above $3,500 Again - Will Momentum Continue? (Technical Analysis)',
 'Gold Priced In Major Currencies Reveals Clear Consolidation (Technical Analysis)',
 'Gold ETF Gains Outpace Bitcoin Funds in 2025',
 'Update: Gold Moves Higher as U.S. Dollar Touches a 52-Week Low',
 'Gold Moves Higher as U.S. Dollar Touches a 52-Week Low',
 'CFTC CoTs Report Confirms There Is Little Froth In The Gold Market',
 'The Rise Of Physical Gold And Silver Over Paper Assets',
 'Update: Gold Trades Lower Even as Dollar Weakens After Report Shows U.S. Inflation Rose in May',
 "This Gold Stock Climbs 51%, Nears Entry As Analyst Sees 'Enhanced Returns' Ahead",
 'Gold Trades Lower Even as Dollar Weakens After Report Shows U.S. Inflation Rose in May']

In [107]:
url = f"https://finnhub.io/api/v1/news?category=crypto&token=d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
r = requests.get(url, timeout=10)

In [108]:
for item in (r.json() or []):
    print((item.get("headline") or "").strip())

US Treasury’s DeFi ID plan is ‘like putting cameras in every living room’
Crypto in Late 2025 and Beyond: What Powell’s Speech Signals for Rates, Inflation and Assets
Ethereum gaming network Xai sues Musk’s xAI for trademark infringement
Ether’s August rally could lead to September downtrend, history suggests
Rising Fed rate chatter may be a red flag for crypto: Santiment
Getting ETH Exposure in 2025: Ether Near Record Highs, Tom Lee Can See $15K by Year End
Custodia Bank CEO warns of TradFi firms facing first crypto winter
Aave tumbles following rumors regarding World Liberty token allocation
KPMG Says Investor Interest in Digital Assets Will Drive Strong Second Half for Canadian Fintechs
BTC climbed to 1.7% of global money before Fed chair signaled rate cut
Eric Trump Makes Bitcoin Price Predictions as He Reportedly Gets Ready to Visit Metaplanet
Bybit Unveils Advanced Multi-Chart Experience in Collaboration with TradingView, Deepens Partnership as Official WSOT 2025 Partner
XRP Surg