# Sale Forecasting using Agent

In [2]:
# Importing Libraries
from langchain.chat_models import ChatOpenAI
import sqlite3
import pandas as pd
from dotenv import load_dotenv
import os
import openai

PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.10/u/root-validator-pre-skip

In [2]:
load_dotenv()
OpenAI_API_KEY = os.getenv("OpenAI_API_KEY")

### Initializing Langchain LLM models

In [None]:
model = ChatOpenAI(openai_api_key=OpenAI_API_KEY,model_name="gpt-4o-mini")

In [4]:
import chardet

# Detect the encoding
with open(r"C:\Users\eDominer\Python Project\Sales Prediction\sales_data_sample.csv", 'rb') as f:
    result = chardet.detect(f.read())

# Use the detected encoding to read the file
encoding = result['encoding']

In [5]:
df = pd.read_csv('sales_data_sample.csv',encoding=encoding)

In [None]:
df.head()

### Creating SQLite Database

In [7]:
# Create a connection object
sqliteConnection = sqlite3.connect('sales.db')

In [None]:
# Convert the dataframe to an SQL table
df.to_sql('Sales', sqliteConnection, if_exists='replace')

### Importing Langchain modules

In [9]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.agent_types import AgentType
from langchain.sql_database import SQLDatabase

In [10]:
# Create an instance of SQLDatabase
database = SQLDatabase.from_uri('sqlite:///sales.db')

### Creating Agent

In [11]:

agent_excuter = create_sql_agent(
    llm=model,
    toolkit=SQLDatabaseToolkit(db=database,llm=model),
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors = True,
    verbose=True,
    return_intermediate_steps=True
)

In [None]:
query = "What will be the total sales for January 2003?"

agent_excuter.run(query)

# New ChatBot

### Hybird Search Langchain

In [None]:
%pip install pinecone-client pinecone-text pinecone-notebooks

In [3]:
pinecone_API_KEY = "pcsk_2QZwDt_EsA9kwmK4azJGavUqToMCibU1LCpsgwAb3ebvvMhmoEFrcd7d9wZ9EELgw9CJDZ"

In [4]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [5]:
import os
from pinecone import Pinecone,ServerlessSpec

index_name = "hybrid-search"
pc = Pinecone(api_key=pinecone_API_KEY)

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384, #Dimension of Dense Vector
        metric="dotproduct",  # sparse value supported only for DotProduct
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
        )

In [6]:
index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x28b9622eea0>

In [7]:
## Vector Embedding and Sparse Matrix
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
# Sparse Matrix
from pinecone_text.sparse import BM25Encoder
bm25encoder = BM25Encoder().default()
bm25encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x28ba7ea8440>

In [9]:
retriever = PineconeHybridSearchRetriever(embeddings=embeddings, index=index, sparse_encoder=bm25encoder)
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x0000028BA7EA8440>, index=<pinecone.data.index.Index object at 0x0000028B9622EEA0>)

In [12]:
import nltk
nltk.download('punkt')

retriever.add_texts(
    [
        "In 2023, the total sales for January was 1000",
        "In 2022, the total sales for February was 700",
        "In 2021, the total sales for March was 500",
    ]
)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\eDominer\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  0%|          | 0/1 [00:00<?, ?it/s]

LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\eDominer/nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\share\\nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\eDominer\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [15]:
# Import necessary libraries
from langchain_community.retrievers import PineconeHybridSearchRetriever
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_huggingface import HuggingFaceEmbeddings
from pinecone_text.sparse import BM25Encoder
import nltk

# Download the punkt tokenizer
nltk.download('punkt')


# Index name
index_name = "hybrid-search"

# Check if index exists and create if it doesn't
if index_name not in pc.list_indexes().names():
    try:
        pc.create_index(
            name=index_name,
            dimension=384,  # Dimension of Dense Vector
            metric="dotproduct",  # sparse value supported only for DotProduct
            spec=ServerlessSpec(cloud="aws", region="us-east-1")
        )
        print(f"Index {index_name} created successfully.")
    except Exception as e:
        print(f"Error creating index: {e}")

# Connect to the index
try:
    index = pc.Index(index_name)
    print(f"Connected to index {index_name} successfully.")
except Exception as e:
    print(f"Error connecting to index: {e}")

# Initialize vector embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Initialize sparse matrix encoder
bm25encoder = BM25Encoder().default()

# Create hybrid search retriever
retriever = PineconeHybridSearchRetriever(embeddings=embeddings, index=index, sparse_encoder=bm25encoder)

# Add texts to the retriever
try:
    retriever.add_texts(
        [
            "In 2023, the total sales for January was 1000",
            "In 2022, the total sales for February was 700",
            "In 2021, the total sales for March was 500",
        ]
    )
    print("Texts added to the retriever successfully.")
except Exception as e:
    print(f"Error adding texts: {e}")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\eDominer\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Connected to index hybrid-search successfully.


  0%|          | 0/1 [00:00<?, ?it/s]

Error adding texts: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\eDominer/nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\share\\nltk_data'
    - 'c:\\Users\\eDominer\\anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\eDominer\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************

