In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [4]:
from langchain_groq import ChatGroq

class GroqLLM:
    def __init__(self, model_name: str = "qwen-qwq-32b"):
        self.model_name = model_name
        self.llm = None
    
    def load_groq_llm(self, temperature: float = 0, max_tokens: int =1000):
        """loads required groq model"""
        try:
            if "llama" in self.model_name:
                self.llm = ChatGroq(model="llama3-70b-8192")
                #return llm
            elif "deepseek" in self.model_name:
                self.llm = ChatGroq(model="deepseek-r1-distill-llama-70b")
                #return llm
            elif "gemma" in self.model_name:
                self.llm = ChatGroq(model="gemma2-9b-it")
                #return llm
            elif "qwen" in self.model_name:
                self.llm = ChatGroq(model="qwen-qwq-32b")
            else:
                raise f"model name was not given"
            return  self.llm
        except Exception as e:
            raise f"Error occured as: {e}"

In [10]:
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents import AgentType
import pandas as pd
import numpy as np
#from datetime import datetime, timedelta

import pandas as pd

class Data_Loader:
    def __init__(self, file_path: str = r"D:\My Files\GitHub_Materials\GenAI_Agents\simple_dataanalysis_agent\flipkart_product_review.csv"):
        self.file_path = file_path

    def load_data(self):
        """Loads the table data using pandas"""
        try:
            dataframe = pd.read_csv(self.file_path)
            print(f"Data loaded successfully with shape: {dataframe.shape}")
            return dataframe
        except FileNotFoundError:
            print(f"Error: File not found at path: {self.file_path}")
        except pd.errors.ParserError:
            print("Error: Failed to parse the CSV file.")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

In [2]:
load_data()

Unnamed: 0,product_id,product_title,rating,summary,review
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,1-more flexible2-bass is very high3-sound clar...
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Super sound and good looking I like that prize
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,Very much satisfied with the device at this pr...
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,"Nice headphone, bass was very good and sound i..."
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Sound quality super battery backup super quali...


In [29]:
class DataAnalysisAgent:
    def __init__(self):
        self.llm = GroqLLM().load_groq_llm()
        self.df = Data_Loader().load_data()

    def pandas_agent(self):
        """creates a pandas agent to analyse the dataframe"""
        agent = create_pandas_dataframe_agent(
            llm= self.llm,
            df= self.df,
            verbose= True,
            allow_dangerous_code= True,
            agent_type= AgentType.ZERO_SHOT_REACT_DESCRIPTION
        )
        return agent

In [30]:
async def get_analysis(question: str):
    """responses to the analytical questions"""
    agent = DataAnalysisAgent().pandas_agent()
    response = await agent.ainvoke(
        {"input": question,
        "agent_scratchpad": f"Human: {question}\nAI: To answer this question, I need to use Python to analyze the dataframe. I'll use the python_repl_ast tool.\n\nAction: python_repl_ast\nAction Input: ",
    })
    return response

In [31]:
import nest_asyncio
nest_asyncio.apply()

if __name__ == "__main__":
    import asyncio
    question = input("Enter your question: ")
    print(asyncio.run(get_analysis(question=question)))

Data loaded successfully with shape: (450, 5)


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
<think>
Okay, I need to find out how many rows are in the dataframe df. Hmm, I remember in pandas, there's a method to get the number of rows. Let me think... Oh right, you can use len(df) or df.shape[0]. Which one should I use here? Well, both should give the same result. Let me try using len(df) first because it's straightforward.

Wait, but I should make sure. The user provided the head of the dataframe, which shows 5 rows, but the actual dataset might have more. So executing len(df) should return the total count. Alternatively, df.shape would give the number of rows and columns. Either way, either method would work. Let me go with len(df) because it's simpler. 

I'll use the python_repl_ast tool to run the command. The action input would be "len(df)". Then, the output should be the number, which is the answer. Let me check if there's any possible mistake here. Oh, no, as long