In [22]:
#Author: Rishab Ohri
import os 
from dotenv import load_dotenv
load_dotenv()

OPENAI_API_KEY = os.getenv("OPEN_API_KEY")
MODEL = "gpt-3.5-turbo"
MODEL = "llama3"

In [23]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.llms import Ollama 
from langchain_community.embeddings import OllamaEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model="llama3")


In [24]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser


In [25]:
from langchain_community.document_loaders import CSVLoader
# IMPORT PDF LOADER IF YOU WANT TO LOAD PDF FILES
# IMPORT DOCX LOADER IF YOU WANT TO LOAD DOCX FILES

# loader = CSVLoader("Car_sales.csv") ENTER YOUR FILE PATH
pages = loader.load_and_split()
pages

[Document(page_content='Manufacturer: Acura\nModel: Integra\nSales_in_thousands: 16.919\n__year_resale_value: 16.36\nVehicle_type: Passenger\nPrice_in_thousands: 21.5\nEngine_size: 1.8\nHorsepower: 140\nWheelbase: 101.2\nWidth: 67.3\nLength: 172.4\nCurb_weight: 2.639\nFuel_capacity: 13.2\nFuel_efficiency: 28\nLatest_Launch: 2/2/2012\nPower_perf_factor: 58.28014952', metadata={'source': 'Car_sales.csv', 'row': 0}),
 Document(page_content='Manufacturer: Acura\nModel: TL\nSales_in_thousands: 39.384\n__year_resale_value: 19.875\nVehicle_type: Passenger\nPrice_in_thousands: 28.4\nEngine_size: 3.2\nHorsepower: 225\nWheelbase: 108.1\nWidth: 70.3\nLength: 192.9\nCurb_weight: 3.517\nFuel_capacity: 17.2\nFuel_efficiency: 25\nLatest_Launch: 6/3/2011\nPower_perf_factor: 91.37077766', metadata={'source': 'Car_sales.csv', 'row': 1}),
 Document(page_content='Manufacturer: Acura\nModel: CL\nSales_in_thousands: 14.114\n__year_resale_value: 18.225\nVehicle_type: Passenger\nPrice_in_thousands: \nEngine_s

In [26]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".
Context: {context}
Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't
answer the question, reply "I don't know".
Context: Here is some context
Question: Here is a question



In [27]:
chain = prompt | model | parser 


In [28]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}}}

In [29]:
chain.invoke({
    "context": "The name I was given was Santiago",
    "question": "What's my name?"
})


'Your name is Santiago.'

In [30]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages, 
    embedding=embeddings
    )

In [31]:
retriever = vectorstore.as_retriever()

retriever.invoke("Machine Learning")

[Document(page_content='Manufacturer: Volkswagen\nModel: Cabrio\nSales_in_thousands: 9.569\n__year_resale_value: 16.575\nVehicle_type: Passenger\nPrice_in_thousands: 19.99\nEngine_size: 2\nHorsepower: 115\nWheelbase: 97.4\nWidth: 66.7\nLength: 160.4\nCurb_weight: 3.079\nFuel_capacity: 13.7\nFuel_efficiency: 26\nLatest_Launch: 5/31/2011\nPower_perf_factor: 48.90737225', metadata={'source': 'Car_sales.csv', 'row': 148}),
 Document(page_content='Manufacturer: Pontiac\nModel: Firebird\nSales_in_thousands: 19.911\n__year_resale_value: 17.805\nVehicle_type: Passenger\nPrice_in_thousands: 25.31\nEngine_size: 3.8\nHorsepower: 200\nWheelbase: 101.1\nWidth: 74.5\nLength: 193.4\nCurb_weight: 3.492\nFuel_capacity: 16.8\nFuel_efficiency: 25\nLatest_Launch: 6/16/2012\nPower_perf_factor: 81.49272616', metadata={'source': 'Car_sales.csv', 'row': 120}),
 Document(page_content='Manufacturer: Toyota\nModel: 4Runner\nSales_in_thousands: 68.411\n__year_resale_value: 19.425\nVehicle_type: Car\nPrice_in_thou

In [32]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
 | prompt 
 | model
 | parser

)

chain.invoke({"question": "What is machine learning?"})

"I don't know. The context appears to be a collection of car sales data, and the question is about machine learning, which is not related to the provided context."

In [34]:
# ENTER YOUR QUESTIONS 
questions = [
   # "What is the most expensive car?",
  #  "What is the least expensive car?",
   # "What is the most effienct car?"
   # "what is the best looking car?",
]
for question in questions:
    print(f"Question:, {question}")
    print(f"Answer: {chain.invoke({'question': question})}\n\n")

Question:, What is the most expensive car?
Answer: Based on the context, I can answer your question.

The Corvette (Manufacturer: Chevrolet, Model: Corvette) has a Price in thousands of $45.705. The Carrera Coupe (Manufacturer: Porsche, Model: Carrera Coupe) has a Price in thousands of $71.02. Since $71.02 is greater than $45.705, the most expensive car is the Carrera Coupe.

So, the answer is: Carrera Coupe.


Question:, What is the least expensive car?
Answer: Based on the context, I can answer your question.

The prices in thousands are:

* Lexus LS400: 54.005
* Volkswagen Cabrio: 19.99
* Chevrolet Corvette: 45.705
* Chrysler LHS: 28.34

So, the least expensive car is the Volkswagen Cabrio with a price of $19.99 thousand.


Question:, What is the most effienct car?what is the best looking car
Answer: I don't know.

The given documents do not provide information on how to compare cars based on efficiency or aesthetics. The data provided only includes details such as sales figures, re