In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
# pd.set_option("display.max_rows", None)
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_colwidth", None)
from datetime import datetime, timedelta
import random
from datasets import load_dataset
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline
import torch
model_id = "deepseek-ai/deepseek-llm-7b-chat"
torch.cuda.set_device(3)  # Sets default to GPU 0
device=torch.device("cuda:3")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map={"": 3},             # auto-distributes across GPUs
    torch_dtype="auto",            # picks bf16 or fp16 depending on availability
)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:19<00:00,  9.93s/it]


In [15]:
class fundamental_analyst:

    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

        start_year = datetime.strptime(start_date, "%Y-%m-%d").year
        end_year = datetime.strptime(end_date, "%Y-%m-%d").year
        self.years = list(range(start_year, end_year + 1))

        self.get_data()
        self.get_financial_info()
        self.generate_prompt()
        
    def get_data(self):
        api_key = "d1l719pr01qt4thec1pgd1l719pr01qt4thec1q0"
        url = f"https://finnhub.io/api/v1/stock/metric?symbol=AAPL&token={api_key}"
        response = requests.get(url)
        self.metric_data=response.json()
        url = f"https://finnhub.io/api/v1/stock/financials-reported?symbol=AAPL&token={api_key}"
        response = requests.get(url)
        self.gaap_data=response.json()

    def find_us_gaap_entry(self, parameter, year):
        for dic in self.gaap_data['data']:
            if dic.get('year') == year:
                for section in ['ic', 'bs', 'cf']:  # income statement, balance sheet, cash flow
                    for entry in dic.get('report', {}).get(section, []):
                        if parameter in entry.values():
                            return entry.get('value')
        return None

    def find_metric_by_year(self, parameter, year):
        series = self.metric_data.get('series', {}).get('annual', {}).get(parameter, [])
        for item in series:
            if item.get('period', '').startswith(str(year)):
                return item.get('v')  # assuming value is under 'v'
        return None

    def get_financial_info(self):
        self.EPS = []
        self.Net_Income = []
        self.Gross_Profit = []
        self.Revenue = []
        self.Total_Assets = []
        self.Total_Liabilities = []
        self.Shareholders_Equity = []
        self.Free_Cash_Flow = []
        self.Operating_Cash_Flow = []
        self.Investing_Cash_Flow = []
        self.Financing_Cash_Flow = []
        self.P_E = []
        self.ROA = []
        self.ROE = []

        for year in self.years:
            self.EPS.append(self.find_us_gaap_entry('us-gaap_EarningsPerShareDiluted', year))
            self.Net_Income.append(self.find_us_gaap_entry('us-gaap_NetIncomeLoss', year))
            self.Gross_Profit.append(self.find_us_gaap_entry('us-gaap_GrossProfit', year))
            self.Revenue.append(self.find_us_gaap_entry('us-gaap_RevenueFromContractWithCustomerExcludingAssessedTax', year))
            self.Total_Assets.append(self.find_us_gaap_entry('us-gaap_Assets', year))
            self.Total_Liabilities.append(self.find_us_gaap_entry('us-gaap_Liabilities', year))
            self.Shareholders_Equity.append(self.find_us_gaap_entry('us-gaap_StockholdersEquity', year))
            self.Operating_Cash_Flow.append(self.find_us_gaap_entry('us-gaap_NetCashProvidedByUsedInOperatingActivities', year))
            self.Investing_Cash_Flow.append(self.find_us_gaap_entry('us-gaap_NetCashProvidedByUsedInInvestingActivities', year))
            self.Financing_Cash_Flow.append(self.find_us_gaap_entry('us-gaap_NetCashProvidedByUsedInFinancingActivities', year))
            self.P_E.append(self.find_metric_by_year('pe', year))
            self.ROA.append(self.find_metric_by_year('roa', year))
            self.ROE.append(self.find_metric_by_year('roe', year))

    
    def generate_prompt(self):
        prompt = f"""
    Pretend that you are a fundamental investment analyst. Analyze the financial performance of {self.ticker} and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
    Justify your decision in 4–6 bullet points using financial reasoning. Consider all the financial information shared. Only use the numerical data given. 
    Do not add assumptions about company operations, reputation, or strategy.

    Financials for {self.ticker}:\n
    """
        for i, year in enumerate(self.years):
            prompt += f"""
    Year: {year}
    Income Statement:
    Revenue: ${self.Revenue[i]:,.0f}
    Gross Profit: ${self.Gross_Profit[i]:,.0f}
    Net Income: ${self.Net_Income[i]:,.0f}
    EPS (Diluted): {self.EPS[i]:.2f}

    Balance Sheet:
    Total Assets: ${self.Total_Assets[i]:,.0f}
    Total Liabilities: ${self.Total_Liabilities[i]:,.0f}
    Shareholders' Equity: ${self.Shareholders_Equity[i]:,.0f}

    Cash Flow:
    Operating Cash Flow: ${self.Operating_Cash_Flow[i]:,.0f}
    Investing Cash Flow: ${self.Investing_Cash_Flow[i]:,.0f}
    Financing Cash Flow: ${self.Financing_Cash_Flow[i]:,.0f}

    Valuation and Ratios:
    P/E Ratio: {self.P_E[i]:.2f}
    ROA: {self.ROA[i]:.2%}
    ROE: {self.ROE[i]:.2%}

    """
        prompt += "\nBased on this, what is your investment recommendation? Pick one action candidate."
        self.prompt = prompt
        return prompt

        
    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )
        outputs = generator(
            self.prompt,
            max_new_tokens=500,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only

In [3]:
class technical_analyst:

    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date
        self.generate_df()
        self.generate_indicators()
        self.generate_technical_prompt()
        
    def generate_df(self):
        self.data = yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def compute_rsi(self, close, period=14):
        delta = close.diff()

        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)

        avg_gain = gain.rolling(window=period).mean()
        avg_loss = loss.rolling(window=period).mean()

        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    def compute_obv(self, close, volume):
        direction = close.diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
        obv = (volume * direction).fillna(0).cumsum()
        return obv
    
    def generate_indicators(self):
        temp=pd.DataFrame()
        temp["SMA_5"] = self.data['Close'][self.ticker].rolling(5).mean()
        temp["SMA_15"] = self.data['Close'][self.ticker].rolling(15).mean()
        temp["SMA_50"] = self.data['Close'][self.ticker].rolling(50).mean()

        temp['EMA_5'] = self.data['Close'][self.ticker].ewm(span=5).mean()
        temp['EMA_10'] = self.data['Close'][self.ticker].ewm(span=10).mean()
        temp['EMA_50'] = self.data['Close'][self.ticker].ewm(span=50).mean()
        temp["Date"] = self.data['Close'][self.ticker].index
        temp["RSI"]=self.compute_rsi(self.data['Close'][self.ticker])
        temp["OBV"]=self.compute_obv(self.data['Close'][self.ticker], self.data['Volume'][self.ticker])
        self.indicator_df=temp

    def generate_technical_prompt(self):
        latest = self.indicator_df.iloc[-1]
        prompt = f"""
You are a technical investment analyst. Analyze the recent technical performance of {self.ticker} and give an investment recommendation.

Your task:
Choose one of the following recommendations: Strong Buy, Buy, Hold, Sell, or Short.
Justify your choice using 4–6 bullet points based only on the indicators below.
You MUST respond in natural human language. Do NOT include any code or formulas.

Technical Indicators for {self.ticker} (most recent data point):

SMA 5: {latest['SMA_5']:.2f}
SMA 15: {latest['SMA_15']:.2f}
SMA 50: {latest['SMA_50']:.2f}

EMA 5: {latest['EMA_5']:.2f}
EMA 10: {latest['EMA_10']:.2f}
EMA 50: {latest['EMA_50']:.2f}

RSI: {latest['RSI']:.2f}
OBV: {latest['OBV']:,.0f}

Based on this, what is your investment recommendation? Pick one action candidate.
"""
        self.prompt=prompt
        return self.prompt 
    
    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )

        outputs = generator(
            self.prompt,
            max_new_tokens=1000,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only


In [30]:
class news_analyst:

    def __init__ (self,ticker, company, start_date, end_date):
        self.ticker=ticker
        self.company=company
        self.start_date=start_date
        self.end_date=end_date
        self.get_news_articles()
        self.generate_news_prompt()
        
    def get_news_articles(self):

        file_path = f"/home/f20222001/test-venv/Portfolio/sp500_news/sp500_news/{self.ticker}.jsonl"
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        end_dt   = datetime.strptime(self.end_date, "%Y-%m-%d")

        results = []
        with open(file_path, 'r') as f:
            for line in f:
                obj = json.loads(line)
                try:
                    article_dt = datetime.strptime(obj["Date"], "%Y-%m-%d")
                    title = obj.get("Article_title", "")
                except (KeyError, ValueError):
                    continue
                
                if start_dt <= article_dt <= end_dt and self.company.lower() in title.lower():
                    results.append(obj['Article_title'])
        if len(results) > 10:
            results = random.sample(results, 10)

        self.selected_news=results

    
    def generate_news_prompt(self):
        prompt = f"""
Pretend that you are a sentiment and headlines investment analyst. Analyze the recent technical performance of {self.ticker} and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
Justify your decision in 4–6 bullet points using sentiment analysis. Only use the headlines given. 
Do not add assumptions about company fundamentals, operations, or strategy.

Headlines:
"""
        for i, item in enumerate(self.selected_news):
            prompt+=item
            prompt+='\n'
        prompt+="Based on this, what is your investment recommendation? Pick one action candidate."
        self.prompt=prompt
        return self.prompt
    
    def generate_response(self):
        generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )

        outputs = generator(
            self.prompt,
            max_new_tokens=500,         # Reduced for memory efficiency
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            # Memory efficient generation settings
            num_beams=1,                # No beam search to save memory
            #early_stopping=True,
            use_cache=True
        )

        full_text = outputs[0]['generated_text']

        response_only = full_text[len(self.prompt):].strip()
        return response_only

In [16]:
funda=fundamental_analyst('NVDA',start_date="2023-01-01", end_date="2024-01-01")
print(funda.prompt)
funda_analysis=funda.generate_response()
print("Response")
print(funda_analysis)

Device set to use cuda:3



    Pretend that you are a fundamental investment analyst. Analyze the financial performance of NVDA and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
    Justify your decision in 4–6 bullet points using financial reasoning. Consider all the financial information shared. Only use the numerical data given. 
    Do not add assumptions about company operations, reputation, or strategy.

    Financials for NVDA:

    
    Year: 2023
    Income Statement:
    Revenue: $383,285,000,000
    Gross Profit: $169,148,000,000
    Net Income: $96,995,000,000
    EPS (Diluted): 6.13

    Balance Sheet:
    Total Assets: $352,583,000,000
    Total Liabilities: $290,437,000,000
    Shareholders' Equity: $62,146,000,000

    Cash Flow:
    Operating Cash Flow: $110,543,000,000
    Investing Cash Flow: $3,705,000,000
    Financing Cash Flow: $-108,488,000,000

    Valuation and Ratios:
    P/E Ratio: 27.86
    ROA: 27.51%
    ROE: 156.08%

    
    Year: 2024
    Income Statement:
    

In [18]:
techni=technical_analyst("NVDA",start_date="2023-01-01", end_date="2024-01-01")
print(techni.prompt)
techni_analysis=techni.generate_response()
print("Response")
print(techni_analysis)

  self.data = yf.download(self.ticker, start=self.start_date, end=self.end_date)
[*********************100%***********************]  1 of 1 completed
Device set to use cuda:3



You are a technical investment analyst. Analyze the recent technical performance of NVDA and give an investment recommendation.

Your task:
Choose one of the following recommendations: Strong Buy, Buy, Hold, Sell, or Short.
Justify your choice using 4–6 bullet points based only on the indicators below.
You MUST respond in natural human language. Do NOT include any code or formulas.

Technical Indicators for NVDA (most recent data point):

SMA 5: 49.29
SMA 15: 48.68
SMA 50: 46.66

EMA 5: 49.32
EMA 10: 49.02
EMA 50: 47.24

RSI: 62.56
OBV: 18,782,895,000

Based on this, what is your investment recommendation? Pick one action candidate.

Response
My recommendation is to Buy NVDA.

Here are my reasons:

1. The SMA 5 is above the SMA 15, which indicates that the stock is in an uptrend.
2. The EMA 5 is above the EMA 10, which also indicates that the stock is in an uptrend.
3. The RSI is at 62.56, which is in the overbought territory, but it's not too high, so it's not a strong sell signal.
4

In [34]:
news=news_analyst("NVDA",start_date="2023-01-01", end_date="2024-01-01", company="Nvidia")
print(news.prompt)
news_analysis=news.generate_response()
print("Response")
print(news_analysis)

Device set to use cuda:3



Pretend that you are a sentiment and headlines investment analyst. Analyze the recent technical performance of NVDA and give a recommendation: Strong Buy, Buy, Hold, Sell, or Short. 
Justify your decision in 4–6 bullet points using sentiment analysis. Only use the headlines given. 
Do not add assumptions about company fundamentals, operations, or strategy.

Headlines:
Nvidia (NVDA) Reports Q3 Earnings: What Key Metrics Have to Say
Unusual Put Option Trade in NVIDIA (NVDA) Worth $4,248.98K
Looking for the Next Nvidia? Try These 3 Small-Cap Chip Stocks
Nvidia forecasts second-quarter revenue above estimates
Beyond Nvidia: 5 AI Chip Stocks to Buy in July
What TSMC, ARM, AMD, Apple, and Nvidia Stock Investors Should Know About Recent Chip Updates
Take the Zacks Approach to Beat the Market: NVIDIA, Lisata, Shopify in Focus
Will AMD's Acquisition Be Enough to Help It Compete With Nvidia AI?
Intel's Gaudi 2 Beats Nvidia's H100 in Certain AI Tasks
There May Be a New Catalyst for Nvidia. And I