In [45]:
import re
import numpy as np
import pandas as pd
import yfinance as yf
from transformers import pipeline
import pandas as pd
import requests
import json
from datetime import datetime
# pd.set_option("display.max_rows", None)
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_colwidth", None)
from datetime import datetime, timezone
import random
from datasets import load_dataset
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline
import torch

In [46]:
model_id = "deepseek-ai/deepseek-llm-7b-chat"
torch.cuda.set_device(2)  # Sets default to GPU 0
device=torch.device("cuda:2")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map={"": 2},             # auto-distributes across GPUs
    torch_dtype="auto",            # picks bf16 or fp16 depending on availability
)

Fetching 2 files: 100%|██████████| 2/2 [00:42<00:00, 21.30s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.27it/s]


In [None]:
import pandas as pd
import yfinance as yf
from transformers import pipeline

class technical_analyst_multi:
    def __init__(self, tickers, start_date, end_date, model=model, tokenizer=tokenizer):
        """
        tickers: list[str] (e.g., ["AAPL","MSFT","NVDA"])
        start_date, end_date: "YYYY-MM-DD"
        model, tokenizer: HF objects already loaded by caller
        """
        self.tickers = tickers
        self.start_date = start_date
        self.end_date = end_date
        self.model = model
        self.tokenizer = tokenizer

        self.generate_df()
        self.generate_indicators()

    # ---------- data ----------
    def generate_df(self):
        # One fetch for all tickers; yfinance returns wide frames with columns = tickers
        df = yf.download(
            self.tickers, start=self.start_date, end=self.end_date, group_by="column", threads=True
        )
        # Normalize to a consistent 3D-like structure: field -> (date x ticker)
        # If only one ticker returns, ensure 2D columns (ticker-level)
        def ensure_2d(frame, field):
            col = frame[field]
            if isinstance(col, pd.Series):
                col = col.to_frame(self.tickers[0])
            return col

        self.close = ensure_2d(df, "Close")
        self.volume = ensure_2d(df, "Volume")

        # Align indices and columns
        self.close = self.close.reindex(columns=self.tickers).dropna(how="all")
        self.volume = self.volume.reindex_like(self.close)

        self.data = df  # keep raw if you need it elsewhere

    # ---------- indicators ----------
    @staticmethod
    def compute_rsi(close: pd.Series, period=14):
        delta = close.diff()
        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)

        avg_gain = gain.rolling(window=period).mean()
        avg_loss = loss.rolling(window=period).mean()

        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    @staticmethod
    def compute_obv(close: pd.Series, volume: pd.Series):
        direction = close.diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
        obv = (volume * direction).fillna(0).cumsum()
        return obv

    def generate_indicators(self):
        """Build a dict of indicator DataFrames keyed by ticker."""
        self.indicator_df = {}
        for t in self.tickers:
            c = self.close[t].dropna()
            v = self.volume[t].reindex_like(c)

            if c.empty or v.empty:
                # Skip tickers with no data in the window
                continue

            temp = pd.DataFrame(index=c.index)
            # SMAs
            temp["SMA_5"] = c.rolling(5).mean()
            temp["SMA_15"] = c.rolling(15).mean()
            temp["SMA_50"] = c.rolling(50).mean()
            # EMAs
            temp["EMA_5"] = c.ewm(span=5, adjust=False).mean()
            temp["EMA_10"] = c.ewm(span=10, adjust=False).mean()
            temp["EMA_50"] = c.ewm(span=50, adjust=False).mean()
            # RSI & OBV
            temp["RSI"] = self.compute_rsi(c)
            temp["OBV"] = self.compute_obv(c, v)
            # Keep last valid rows only later when prompting
            self.indicator_df[t] = temp

    # ---------- prompt & generation ----------
    def _latest_row_with_all_indicators(self, df: pd.DataFrame):
        needed = ["SMA_5","SMA_15","SMA_50","EMA_5","EMA_10","EMA_50","RSI","OBV"]
        if df is None or df.empty:
            return None
        complete = df.dropna(subset=needed)

        if complete.empty:
            return None
        return complete.iloc[-1]

    def build_joint_messages(self):
        """
        Build a chat-style message list for Phi-4.
        Instead of JSON schema, we give natural text indicators for each ticker.
        """
        rows = []
        for t in self.tickers:
            df = self.indicator_df.get(t)
            latest = self._latest_row_with_all_indicators(df)
            if latest is None:
                continue
            rows.append(
                f"""{t}:
        SMA 5: {latest['SMA_5']:.2f}
        SMA 15: {latest['SMA_15']:.2f}
        SMA 50: {latest['SMA_50']:.2f}
        EMA 5: {latest['EMA_5']:.2f}
        EMA 10: {latest['EMA_10']:.2f}
        EMA 50: {latest['EMA_50']:.2f}
        RSI: {latest['RSI']:.2f}
        OBV: {latest['OBV']:,.0f}"""
            )

        if not rows:
            return None

        indicators_block = "\n\n".join(rows)

        ACTIONS = ["Strong Buy", "Buy", "Hold", "Sell", "Short"]

        system_msg = {
            "role": "system",
            "content": (
                "You are a technical investment advisor. Your job is to issue clear, "
                "concise trading recommendations using ONLY the provided indicators. "
                "DO reveal your internal reasoning or chain-of-thought. "
                "Respond with final answers only, in the requested format."
            )
        }

        user_msg = {
            "role": "user",
            "content": (
                "Analyze the recent technicals for the tickers below. For EACH ticker, choose exactly "
                f"one action from {ACTIONS} and give a confidence in [0,1]. Keep the rationale to at most "
                "2 short bullet points grounded ONLY in the indicators (no news or fundamentals).\n\n"
                "Output format (no extra prose):\n"
                "Recommendations:\n"
                "TICKER — ACTION — confidence: [0,1]\n"
                "- point 1\n"
                "- point 2\n\n"
                "Tickers and indicators:\n"
                f"{indicators_block}\n\n"
            )
        }


        return [system_msg, user_msg]
   
    def generate_joint_response(self, model, tokenizer,max_new_tokens=2024, temperature=0.7, top_p=0.9):
        messages = self.build_joint_messages()
        if not messages:
            return {"messages": None, "raw": "", "decoded": ""}

        inputs = tokenizer.apply_chat_template(
            messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
        )

        outputs = model.generate(
            inputs.to(model.device),
            max_new_tokens=max_new_tokens,      # ↓ hard cap on length
            do_sample=False,         # deterministic, less rambly
            temperature=0.0,         # ignored when do_sample=False; keep for symmetry
            top_p=1.0,
            repetition_penalty=1.05, # gentle nudge against waffle
            no_repeat_ngram_size=3,  # reduces repetitiveness
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
        print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [48]:
tam = technical_analyst_multi(
    tickers=["AAPL","MSFT","NVDA"],
    start_date="2024-01-01",
    end_date="2025-08-16",
    model=model,
    tokenizer=tokenizer
)

tam.generate_indicators()            # if not already called in __init__
tam.build_joint_messages()
result = tam.generate_joint_response(model, tokenizer)

  df = yf.download(
[*********************100%***********************]  3 of 3 completed
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


You are a technical investment advisor. Your job is to issue clear, concise trading recommendations using ONLY the provided indicators. DO reveal your internal reasoning or chain-of-thought. Respond with final answers only, in the requested format.

User: Analyze the recent technicals for the tickers below. For EACH ticker, choose exactly one action from ['Strong Buy', 'Buy', 'Hold', 'Sell', 'Short'] and give a confidence in [0,1]. Keep the rationale to at most 2 short bullet points grounded ONLY in the indicators (no news or fundamentals).

Output format (no extra prose):
Recommendations:
TICKER — ACTION — confidence: [0,1]
- point 1
- point 2

Tickers and indicators:
AAPL:
        SMA 5: 230.91
        SMA 15: 217.69
        SMA 50: 209.38
        EMA 5: 229.77
        EMA 10: 224.87
        EMA 50: 212.92
        RSI: 68.05
        OBV: 1,227,299,400

MSFT:
        SMA 5: 522.85
        SMA 15: 522.76
        SMA 50: 501.80
        EMA 5: 522.18
        EMA 10: 522.16
        EMA 50