In [1]:
print("Hello World !")

Hello World !


## Load Libraries

In [None]:
import os 
import torch
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts.chat import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from huggingface_hub import login
from langchain.chains import RetrievalQA
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain.document_loaders import DirectoryLoader, CSVLoader

from dotenv import load_dotenv

## Load Model Groq from API

In [None]:
load_dotenv(".env")
api_key = os.getenv("API_KEY")

model_llm = ChatGroq(groq_api_key=api_key,model_name="llama-3.1-8b-instant")
model_llm

## Load Embedding Model from  Hugging Face

In [None]:
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"

model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name= embedding_model_name, model_kwargs=model_kwargs)

## Insert Data to VectorDB

In [None]:
# Load CSV Files from Directory
folder_path = 'data/used_car_dataset.csv'
loader = CSVLoader(folder_path)
documents = loader.load_and_split()


print(f"Loaded {len(documents)} documents from CSV files.")

In [None]:
vector_store = Chroma(
    collection_name="car_dataset",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [None]:
vector_store.add_documents(documents=documents)

## Prompt Template

In [None]:
prompt = """
        You are an expert data analyst and Python programmer specializing in tax data visualization for Indonesia. Your task is to generate Python code that creates insightful plots using the Matplotlib library.

        Given:
        1. CSV file path: {df_path}
        2. DataFrame columns: {df_columns}
        3. User's request: "{input}"
        
        Instructions:
        1. Analyze the user's request and the provided data structure.
        2. Generate Python code that accomplishes the following:
           a. Imports necessary libraries (pandas, matplotlib.pyplot).
           b. Reads the CSV file using the provided path.
           c. Adds appropriate labels, title, and legend to the plot.
           d. Improves the plot's readability and aesthetics (e.g., adjusting colors, font sizes, or layout).
        3. Ensure the code is efficient, well-commented, and follows Python best practices.
        4. Ensure the code must simple
        
        Please provide only Python code.
"""

In [None]:
prompt_chart = PromptTemplate.from_template(prompt)

## Chain

In [None]:
retriever = vector_store.as_retriever()

In [None]:
# Fungsi Generate Teks
def generate_teks(query:str, model = model_llm, retriever = retriever, prompt = prompt):
    qa_chain = RetrievalQA.from_chain_type(
                model,
                retriever=retriever,
                return_source_documents=False,
                chain_type_kwargs={"prompt": prompt}
                )
    response = qa_chain.invoke({"query": query})
    return response["result"]