In [1]:
import os
import getpass

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
from openpyxl import load_workbook

In [3]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [4]:
file = 'C:/Users/markc/H2_Scale_Ruth.pdf'
loader = PyPDFLoader(file_path=file)
pages = loader.load_and_split()

ImportError: pypdf package not found, please install it with `pip install pypdf`

In [32]:
from langchain.indexes import VectorstoreIndexCreator

In [33]:
#pip install docarray

In [34]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [35]:
query ="Please summarize all electric vehicles in Washington\
by make, model, and type \
in a table in markdown and summarize each category."

In [36]:
response = index.query(query)

In [37]:
display(Markdown(response))



| Make | Model | Type |
|------|-------|------|
| TESLA | MODEL Y | Battery Electric Vehicle (BEV) |
| FORD | MUSTANG MACH-E | Battery Electric Vehicle (BEV) |
| VOLKSWAGEN | ID.4 | Battery Electric Vehicle (BEV) |
| VOLKSWAGEN | E-GOLF | Battery Electric Vehicle (BEV) |

There are four electric vehicles in Washington: TESLA MODEL Y, FORD MUSTANG MACH-E, VOLKSWAGEN ID.4, and VOLKSWAGEN E-GOLF. All of them are Battery Electric Vehicles (BEVs).

In [38]:
loader = CSVLoader(file_path=file)

In [39]:
docs = loader.load()

In [40]:
docs[0]

Document(page_content='VIN (1-10): 5YJXCAE26J\nCounty: Yakima\nCity: Yakima\nState: WA\nPostal Code: 98908\nModel Year: 2018\nMake: TESLA\nModel: MODEL X\nElectric Vehicle Type: Battery Electric Vehicle (BEV)\nClean Alternative Fuel Vehicle (CAFV) Eligibility: Clean Alternative Fuel Vehicle Eligible\nElectric Range: 238\nBase MSRP: 0\nLegislative District: 14\nDOL Vehicle ID: 141151601\nVehicle Location: POINT (-120.56916 46.58514)\nElectric Utility: PACIFICORP\n2020 Census Tract: 53077001100', metadata={'source': 'Electric_Vehicle_Population_Data-Short.csv', 'row': 0})

In [41]:
#Chunking not needed here because data is small

from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [42]:
embed = embeddings.embed_query("Hi my name is Iron Man")

In [43]:
print(len(embed))

1536


In [44]:
print(embed[:5])

[-0.03388969227671623, -0.03518833592534065, -0.022326737642288208, -0.030867839232087135, 0.0024131108075380325]


In [45]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [47]:
query = "Please suggest a vehicle with a range over 200 miles"

In [48]:
docs = db.similarity_search(query)

In [49]:
len(docs)

4

In [50]:
docs[0]

Document(page_content='VIN (1-10): 5YJ3E1EA5K\nCounty: \nCity: \nState: BC\nPostal Code: \nModel Year: 2019\nMake: TESLA\nModel: MODEL 3\nElectric Vehicle Type: Battery Electric Vehicle (BEV)\nClean Alternative Fuel Vehicle (CAFV) Eligibility: Clean Alternative Fuel Vehicle Eligible\nElectric Range: 220\nBase MSRP: 0\nLegislative District: \nDOL Vehicle ID: 475254825\nVehicle Location: \nElectric Utility: \n2020 Census Tract: ', metadata={'source': 'Electric_Vehicle_Population_Data-Short.csv', 'row': 103})

In [51]:
retriever = db.as_retriever()

In [52]:
llm = ChatOpenAI(temperature = 0.0)

In [53]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [55]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
vehicles with range greater than 150 miles in a table in markdown and summarize each one.") 


In [56]:
display(Markdown(response))

| VIN (1-10) | County | City | State | Model Year | Make | Model | Electric Vehicle Type | CAFV Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract |
|------------|--------|------|-------|------------|------|-------|----------------------|-----------------|---------------|-----------|---------------------|---------------|----------------|-----------------|-------------------|
| 5YJ3E1EA5K |        |      | BC    | 2019       | TESLA | MODEL 3 | BEV                  | CAFV Eligible   | 220           | 0         |                     | 475254825     |                |                 |                   |
| 1N4AZ0CP1D |        |      | AP    | 2013       | NISSAN | LEAF    | BEV                  | CAFV Eligible   | 75            | 0         |                     | 112277063     |                |                 |                   |
| YV4BR0CM9N | Yakima | Yakima | WA    | 2022       | VOLVO | XC90    | PHEV                 | Not eligible    | 18            | 0         | 14                  | 196640419     | POINT (-120.56916 46.58514) | PACIFICORP       | 53077000401       |
| 1G1FW6S06H | Island | Clinton | WA    | 2017       | CHEVROLET | BOLT EV | BEV                  | CAFV Eligible   | 238           | 0         | 10                  | 132227280     | POINT (-122.35803 47.9796) | PUGET SOUND ENERGY INC | 53029972000       |

The table above lists all the vehicles with an electric range greater than 150 miles. 

- The first vehicle is a 2019 Tesla Model 3 with an electric range of 220 miles. It is eligible for Clean Alternative Fuel Vehicle (CAFV) incentives. 
- The second vehicle is a 2013 Nissan Leaf with an electric range of 75 miles. It is also eligible for CAFV incentives. 
- The third vehicle is a 2022 Volvo XC90, which is a plug-in hybrid electric vehicle (PHEV) with an electric range of 18 miles. It is not eligible for CAFV incentives due to its low battery range. 
- The fourth vehicle is a 2017 Chevrolet Bolt EV with an electric range of 238 miles. It is eligible for CAFV incentives. 

Overall, these vehicles are all electric or plug-in hybrid electric vehicles with a range greater than 150 miles, making them suitable for longer trips without needing to recharge frequently.

In [57]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [58]:
query =  "Please list all your electric vehicles in a table \
in markdown and summarize each one."

In [59]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [60]:
display(Markdown(response))

| VIN | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 5YJ3E1EA5K | N/A | N/A | BC | N/A | 2019 | TESLA | MODEL 3 | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 220 | 0 | N/A | 475254825 | N/A | N/A | N/A |
| 1N4AZ0CP1D | N/A | N/A | AP | N/A | 2013 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 75 | 0 | N/A | 112277063 | N/A | N/A | N/A |
| 1G1FW6S06H | Island | Clinton | WA | 98236 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 10 | 132227280 | POINT (-122.35803 47.9796) | PUGET SOUND ENERGY INC | 53029972000 |
| 1G1FX6S04H | Snohomish | Edmonds | WA | 98020 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 32 | 104156608 | POINT (-122.37689 47.81116) | PUGET SOUND ENERGY INC | 53061050800 |

- The first vehicle is a 2019 Tesla Model 3 with an electric range of 220 miles. It is located in British Columbia, Canada, and no other information is available.
- The second vehicle is a 2013 Nissan Leaf with an electric range of 75 miles. It is located in an unknown location in the state of AP, and no other information is available.
- The third vehicle is a 2017 Chevrolet Bolt EV with an electric range of 238 miles. It is located in Clinton, WA, in Island County, and is served by Puget Sound Energy Inc. It is eligible for Clean Alternative Fuel Vehicle (CAFV) incentives and is located in legislative district 10.
- The fourth vehicle is a 2017 Chevrolet Bolt EV with an electric range of 238 miles. It is located in Edmonds, WA, in Snohomish County, and is served by Puget Sound Energy Inc. It is eligible for Clean Alternative Fuel Vehicle (CAFV) incentives and is located in legislative district 32.

In [61]:
response = index.query(query, llm=llm)

In [62]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])