In [1]:
!pip freeze > requirements.txt

In [32]:
#Step 1: import below libraries before writting the code:
#Note: Library count can be increased/decreased based on feature requirement.
from groq import Groq
import pandas as pd
from sentence_transformers import SentenceTransformer
import json
import faiss
import numpy as np

In [None]:
# Get the Groq API key from https://console.groq.com/keys , Copy the key and save it in a file for further use.

client= Groq(api_key= "<Enter your Groq API key here>")
# Step 5:
embede_model= SentenceTransformer('all-MiniLM-L6-v2')


In [34]:
# Try to upload a dataset - csv file
# We are creating a chatbot which will give smart answers based on data

def load_csv(filepath):
    df=pd.read_csv(filepath)
    #print(df)
    return df

In [None]:
df= load_csv('Enter your CSV file path here')  #e.g. 'tata_elexi_1y.csv'

In [None]:
#Convert the Dataframe into small chunks and then convert it to Json format:
def chunk_dataframe(df, chunk_size=5):
    chunks=[]
    for i in range(0,len(df), chunk_size):
        chunk = df.iloc[i:i+chunk_size]
        chunks.append(chunk.to_json())
        return chunks

In [None]:
# Check the function whether it is working fine:
chunk_dataframe(df, chunk_size=5)

['{"DATE":{"0":"19-Dec-2025","1":"18-Dec-2025","2":"17-Dec-2025","3":"16-Dec-2025","4":"15-Dec-2025"},"SERIES":{"0":"EQ","1":"EQ","2":"EQ","3":"EQ","4":"EQ"},"OPEN":{"0":"5,116.00","1":"4,980.00","2":"5,000.00","3":"5,044.00","4":"5,045.50"},"HIGH":{"0":"5,450.00","1":"5,049.50","2":"5,052.50","3":"5,044.00","4":"5,126.00"},"LOW":{"0":"5,062.00","1":"4,934.50","2":"4,950.50","3":"4,980.00","4":"5,016.00"},"PREV. CLOSE":{"0":"5,013.50","1":"4,968.50","2":"4,998.00","3":"5,048.50","4":"5,030.50"},"LTP":{"0":"5,410.00","1":"5,046.00","2":"4,962.00","3":"4,997.50","4":"5,045.00"},"CLOSE":{"0":"5,413.50","1":"5,013.50","2":"4,968.50","3":"4,998.00","4":"5,048.50"},"VWAP":{"0":"5,330.77","1":"4,990.49","2":"4,991.65","3":"5,006.12","4":"5,060.94"},"52W H":{"0":"7,424.35","1":"7,424.35","2":"7,424.35","3":"7,424.35","4":"7,424.35"},"52W L":{"0":"4,700.00","1":"4,700.00","2":"4,700.00","3":"4,700.00","4":"4,700.00"},"VOLUME":{"0":1773370,"1":105540,"2":72380,"3":66003,"4":114281},"VALUE":{"0":

In [None]:
# Call the function to chunk the dataframe:
chunks=chunk_dataframe(df, chunk_size=5)

In [None]:
### Create Embeddings from chunks and saving it to vector database --- RAG

def create_faiss_index(chunks):
    embeddings= embede_model.encode(chunks)
    dim=embeddings.shape[1]
    index=faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, chunks, embeddings


In [44]:
index, chunks, embeddings = create_faiss_index(chunks)

In [45]:
# Checking if Embedding is workking or not!
# Below stepis optionsal
embeddings[0]

array([-1.14001636e-03,  7.52340183e-02, -2.94515910e-03,  2.84028109e-02,
       -7.89641738e-02,  1.89702539e-03, -4.73645404e-02, -2.38329656e-02,
        3.65337804e-02,  4.20397036e-02, -4.21190858e-02, -4.07668091e-02,
       -3.79597023e-02, -3.32357857e-04,  7.27633806e-03,  2.76851356e-02,
       -1.22189857e-01, -2.41195131e-02,  1.48424702e-02, -1.21302463e-01,
        1.18955038e-02,  1.21545081e-03,  1.95442364e-02, -9.90169644e-02,
        5.08297198e-02,  8.59472230e-02,  2.38762945e-02,  8.30201283e-02,
       -5.27291559e-03,  5.01079764e-03, -3.27819437e-02, -1.82802044e-02,
        4.88070324e-02,  4.25267965e-02, -1.60406940e-02, -4.08273563e-02,
       -5.10468381e-03,  8.82710610e-03,  6.93629980e-02,  5.20033464e-02,
        7.20652863e-02,  1.69974659e-02,  3.01693147e-03, -1.20475553e-01,
        9.50935949e-03, -8.09907243e-02, -1.05458625e-01, -7.94150122e-03,
        8.94640386e-03,  6.36492372e-02, -2.03495957e-02, -3.01699359e-02,
       -1.91073213e-02,  

In [49]:
### Performing Retrieval

def retriev_relavant_chunks(query, index, chunks, embeddings, k=2):
    query_vec= embede_model.encode([query])
    distances, indices= index.search(query_vec,k)
    return [chunks[i] for i in indices[0]]

In [None]:
# Generating response from Groq LLM model:
def generate_response_from_groq(context,query):
    prompt = f""" You are smart data scientist. Here's some context from a csv file: \n\n {context}\n\n
    Questions :
    {query}\n
    Answer:
    """
    completion=client.chat.completions.create(
    model ="llama-3.1-8b-instant",
    messages =[
    {"role": 'system', 'content':'You are data science assistant'},
    {'role':"user", 'content':prompt}
    ],
    temperature= 0.2
    )
    return completion.choices[0].message.content

In [None]:
# Generating insights from the dataframe using Groq LLM model:
def generate_insights(df):
    stats= df.describe(include ='all').to_string()
    prompt = f""" Here is a dataset summary :\n{stats}\n\n Generate 3 smart insights or trends that you observe
    """
    completion=client.chat.completions.create(
    model ="llama-3.1-8b-instant",
    message =[
    {'role':"user", 'content':prompt}
    ],
    )
    return completion.choices[0].message.content

In [None]:
# Final Chatbot Loop
if __name__=="__main__":
    filepath='<Enter your CSV file path here>'
    df =load_csv(filepath)
    chunks= chunk_dataframe(df)
    index, chunks, embeddings = create_faiss_index(chunks)

    while True:
        query = input("Ask a question (or type Insight or exit): ")
        if query.lower()=='exit':
            break
        elif query.lower()=='insights':
            print("Generating Smart Insights:")
            print(generate_insights(df))
        else:
            context = "\n\n".join(retriev_relavant_chunks(query, index, chunks, embeddings, k=2))
            answers= generate_response_from_groq(context,query)
            print("Answer", answers)

Ask a question (or type Insight or exit):  insight


Answer It seems like you have a CSV file containing stock market data. Based on the provided data, here are some insights:

1. **Volume and Value Trends**: The volume of trades is significantly higher on December 19, 2025, compared to the other days. The value of trades also shows a similar trend, with the highest value on December 19, 2025.

2. **Price Trends**: The high and low prices of the stock are relatively stable across the days, with a slight increase in the high price on December 19, 2025. The closing price also shows a similar trend, with a slight increase on December 19, 2025.

3. **VWAP Trend**: The volume-weighted average price (VWAP) shows a slight increase across the days, with the highest VWAP on December 19, 2025.

4. **52-Week High and Low**: The 52-week high and low prices are relatively stable across the days, with a value of 7,424.35 and 4,700.00, respectively.

5. **Trading Activity**: The number of trades is relatively low across the days, with the highest numbe

Ask a question (or type Insight or exit):  What is highest closing point with date


Answer To find the highest closing point with the corresponding date, we can use the pandas library in Python. Here's how you can do it:

```python
import pandas as pd

# Assuming the data is in a CSV file named 'data.csv'
df = pd.read_csv('data.csv')

# Convert the 'CLOSE' column to numeric values (it's currently a string)
df['CLOSE'] = df['CLOSE'].str.replace(',', '').astype(float)

# Find the row with the highest closing point
max_close_row = df.loc[df['CLOSE'].idxmax()]

# Print the date and closing point
print(f"Highest closing point: {max_close_row['CLOSE']}")
print(f"Date: {max_close_row['DATE']}")
```

This code will find the row with the highest closing point and print the date and closing point.

However, if you want to get the highest closing point for each series (EQ in this case), you can use the following code:

```python
import pandas as pd

# Assuming the data is in a CSV file named 'data.csv'
df = pd.read_csv('data.csv')

# Convert the 'CLOSE' column to numeric values 

Ask a question (or type Insight or exit):  What is the last closing point in the dataset


Answer To find the last closing point in the dataset, we need to first load the data into a pandas DataFrame and then extract the last row's 'CLOSE' value.

```python
import pandas as pd

# Load the data into a pandas DataFrame
data = pd.DataFrame({
    "DATE": ["19-Dec-2025", "18-Dec-2025", "17-Dec-2025", "16-Dec-2025", "15-Dec-2025"],
    "SERIES": ["EQ", "EQ", "EQ", "EQ", "EQ"],
    "OPEN": ["5,116.00", "4,980.00", "5,000.00", "5,044.00", "5,045.50"],
    "HIGH": ["5,450.00", "5,049.50", "5,052.50", "5,044.00", "5,126.00"],
    "LOW": ["5,062.00", "4,934.50", "4,950.50", "4,980.00", "5,016.00"],
    "PREV. CLOSE": ["5,013.50", "4,968.50", "4,998.00", "5,048.50", "5,030.50"],
    "LTP": ["5,410.00", "5,046.00", "4,962.00", "4,997.50", "5,045.00"],
    "CLOSE": ["5,413.50", "5,013.50", "4,968.50", "4,998.00", "5,048.50"],
    "VWAP": ["5,330.77", "4,990.49", "4,991.65", "5,006.12", "5,060.94"],
    "52W H": ["7,424.35", "7,424.35", "7,424.35", "7,424.35", "7,424.35"],
    "52W L": ["4