In [None]:
! pip install chromadb


In [None]:
! pip install tiktoken


In [None]:
! pip install python-dotenv


In [4]:
import os
import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from dotenv import load_dotenv
import shutil

In [5]:
load_dotenv()

True

In [None]:
# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = "OPEN_KEY"

In [7]:
def format_sales_content(row):
    return (
        f"On {row['sale_date']}, a {row['fuel_variant']} {row['model']} ({row['car_type']} - {row['model_variant']} variant) was sold in {row['city']} ({row['region']} region) for ₹{row['sale_amount']}."
    )

def format_marketing_content(row):
    return (
        f"{row['marketing_channel']} campaign '{row['campaign_name']}' targeting {row['target_cartype']}s for {row['target_audience']} offered a {row['discount_percent']}% discount and had {row['actual_sales']} actual sales."
    )

def format_customer_content(row):
    return (
        f"Customer {row['customer_id']} is a {row['age']}-year-old {row['gender']} who prefers {row['preferred_fuel_variant']} {row['preferred_model']} ({row['preferred_cartype']} - {row['preferred_model_variant']} variant). Purchase type: {row['purchase_type']}."
    )
#CSV Loader with Format Routing
def load_csvs_to_documents(file_paths):
    all_docs = []
    for path in file_paths:
        df = pd.read_csv(path)
        source_name = os.path.basename(path).replace(".csv", "")
        for _, row in df.iterrows():
            if source_name == "sales":
                content = format_sales_content(row)
            elif source_name == "marketing_campaign":
                content = format_marketing_content(row)
            elif source_name == "customer":
                content = format_customer_content(row)
            else:
                content = "\n".join(f"{col}: {row[col]}" for col in df.columns)
            all_docs.append(Document(page_content=content, metadata={"source": source_name}))
    return all_docs

In [8]:
#Chunk documents
def chunk_documents(docs, chunk_size=500, chunk_overlap=50):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_documents(docs)

In [9]:
def create_chroma_index(chunks, persist_dir="chroma_db"):
    # Clean old DB if needed
    import shutil
    shutil.rmtree(persist_dir, ignore_errors=True)

    embedding_model = OpenAIEmbeddings()

    vectordb = Chroma.from_documents(
        documents=chunks,
        embedding=embedding_model,
        persist_directory=persist_dir
    )
    vectordb.persist()
    return vectordb


In [10]:
# Query the ChromaDB
def query_chroma(vectordb, query_text, k=5, filters=None):
    results = vectordb.similarity_search(query_text, k=k, filter={"source": "marketing_campaign"})
    for i, result in enumerate(results):
        print(f"\n Result {i+1}:\n{result.page_content}")
        

In [11]:
# ----------- Main Execution-----------
csv_files = [
        r"C:\Users\Dell\Downloads\customer.csv",
        r"C:\Users\Dell\Downloads\sales.csv",
        r"C:\Users\Dell\Downloads\marketing_campaign.csv"
    ]



    
    

In [None]:
# Load → Chunk → Store
all_documents = []
all_documents = load_csvs_to_documents(csv_files)
chunks = chunk_documents(all_documents)
chroma_db = create_chroma_index(chunks)

In [13]:
# Sample query
query_chroma(chroma_db, "SUV campaigns in West region for End of Year Bonanza")


 Result 1:
Social Media campaign 'End of Year Bonanza' targeting SUV s for Young Professionals offered a 20%% discount and had 356 actual sales.

 Result 2:
TV campaign 'End of Year Bonanza' targeting SUV s for Luxury Car Owners offered a 30%% discount and had 108 actual sales.

 Result 3:
SMS campaign 'End of Year Bonanza' targeting SUV s for Luxury Car Owners offered a 25%% discount and had 409 actual sales.

 Result 4:
Social Media campaign 'End of Year Bonanza' targeting SUV s for Budget Buyers offered a 10%% discount and had 58 actual sales.

 Result 5:
Email campaign 'End of Year Bonanza' targeting SUV s for Young Professionals offered a 30%% discount and had 244 actual sales.
