In [1]:
import pandas as pd

df=pd.read_csv('Data/data.csv').drop(['Close'],axis=1)
input_data = df.iloc[0]
input_data

Date      2005-11-21
Open         1.94402
High        1.955116
Low         1.911029
Volume     511711200
ticker          AAPL
Year            2005
Month             11
Day               21
Name: 0, dtype: object

In [2]:
import pickle as pc
def load_assets():
    model = pc.load(open('Models/model.pkl','rb'))
    scaler = pc.load(open('Scaler/scaler.pkl','rb'))
    encoder = pc.load(open('encoders/encoder.pkl','rb'))

    return model, scaler, encoder

In [3]:
model, scaler, encoder = load_assets()

In [4]:
def transform_df(df, encoder, scaler):
    # Ensure df is a DataFrame (even if single row)
    if isinstance(df, pd.Series):
        df = df.to_frame().T

    df1 = df.copy()
    
    # Drop Date column safely
    if 'Date' in df1.columns:
        df1 = df1.drop(['Date'], axis=1)
    
    # Encode ticker
    df1['ticker'] = encoder.transform(df1['ticker'])
    
    # Scale all features
    final_df = pd.DataFrame(
        data=scaler.transform(df1), 
        columns=scaler.get_feature_names_out()
    )
    
    return final_df

In [5]:
transform_df(input_data,encoder=encoder,scaler=scaler)

Unnamed: 0,Open,High,Low,Volume,ticker,Year,Month,Day
0,-0.780733,-0.781438,-0.780499,1.299734,-1.224745,-1.799038,1.300118,0.60065


In [6]:
model.predict(transform_df(input_data,encoder=encoder,scaler=scaler))



array([1.93159489])

In [7]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv('GROQ_API_KEY')
llm = ChatGroq(model='llama-3.1-8b-instant',api_key=api_key)
llm.invoke('Hello')

AIMessage(content='Hello, how can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 36, 'total_tokens': 46, 'completion_time': 0.01031063, 'completion_tokens_details': None, 'prompt_time': 0.001814386, 'prompt_tokens_details': None, 'queue_time': 0.050982043, 'total_time': 0.012125016}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--258156df-e36e-4578-bb62-6f5b7a255a1c-0', usage_metadata={'input_tokens': 36, 'output_tokens': 10, 'total_tokens': 46})

In [17]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are a data extraction and inference assistant.

Your task is to take a natural-language sentence about a stock and convert it
into a JSON object with the following fields:

[
  "Open",
  "High",
  "Low",
  "Close",
  "Volume",
  "ticker",
  "Year",
  "Month",
  "Day"
]

### Rules:
- Always output **valid JSON**.
- Infer or estimate any missing values based on context.
- If the sentence gives close/high/low but not open, you may reasonably infer it from the trend.
- "Date" must be in **YYYY-MM-DD** format.
- "Year", "Month" and "Day" must match the parsed date.
- "ticker" must be ONLY: "AAPL", "MSFT", "GOOGL".
- If multiple tickers appear, choose the first one.
- All numeric values must be numbers (not strings).
- Ensure the final JSON contains **all fields with inferred values**.


### User sentence and date:
{sentence} and {date}

### JSON Output:
""")

In [18]:
from datetime import datetime

today = datetime.today().date()
print(today)

2025-11-20


In [19]:
chain = prompt|llm
print(chain.invoke({'sentence':'Google shares went up today, closing at 171.','date':today}).content)

To process the user sentence, we need to extract relevant information and infer missing values. Here's the step-by-step breakdown:

1. The sentence mentions "Google shares," so we can infer the ticker symbol as "GOOGL."
2. It mentions "went up," but we don't have the specific opening price. However, since it closed at 171, we can infer the opening price might be lower, but we need more information to determine the exact value. For simplicity, let's assume the opening price is 20% lower than the closing price. This is a rough estimate and might not be accurate in real-world scenarios.

   - Closing price: 171
   - Opening price: 171 * 0.8 (20% lower) = 136.8

3. The sentence doesn't mention the high or low price, but we can infer the high price as higher than the closing price, and the low price as lower than the closing price.

   - High price: 171 + (171 * 0.1) = 188.1 (10% higher than the closing price)
   - Low price: 171 - (171 * 0.1) = 153.9 (10% lower than the closing price)

4. 