# Find torch installation command for your machine at https://pytorch.org/get-started/locally/

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 --upgrade

In [None]:
!pip install langchain einops accelerate transformers bitsandbytes scipy

In [None]:
!pip install xformers sentencepiece 

In [None]:
!pip install llama-index==0.7.21 llama_hub==0.0.19

In [None]:
pip install llama-index --upgrade --no-cache-dir --force-reinstall

In [None]:
%pip install llama-index-llms-huggingface

In [None]:
%pip install llama-index-embeddings-langchain

In [None]:
pip install sentence-transformers

In [None]:
pip install -U llama-index-readers-file

In [None]:
pip install langchain_experimental

In [None]:
pip install tabulate

In [None]:
pip install text_generation

In [None]:
pip install chromadb

In [None]:
pip install lark

In [None]:
%pip install llama-index-vector-stores-chroma

In [None]:
pip install spacy

In [1]:
# Import transformer classes for generaiton
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
# Import torch for datatype attributes 
import torch

In [2]:
import gc

torch.cuda.empty_cache()
gc.collect()

100

In [3]:
# Define variable to hold llama2 weights naming 
name = "meta-llama/Llama-2-7b-chat-hf"
# Set auth token variable from hugging face 
auth_token = "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"

In [4]:
# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name, 
    cache_dir='./model/', use_auth_token=auth_token)



In [5]:
# Create model
model = AutoModelForCausalLM.from_pretrained(name, 
    cache_dir='./model/', use_auth_token=auth_token, torch_dtype=torch.float16, 
    rope_scaling={"type": "dynamic", "factor": 2}, load_in_8bit=True) 



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
# Import the prompt wrapper...but for llama index
from llama_index.core.prompts.prompts import SimpleInputPrompt
# Create a system prompt 
system_prompt = """<s>[INST] <<SYS>>

 You are now a carbon footprint analyst. Your job is to reason about the carbon footprint of "{product_name}" 
 based off its components. For each part of the calculation, explain how you came to that conclusion. Only use 
 factual data for your computations and do not make assumptions. Do not use information about any other product
 other than "{product_name}" to perform your computations. If you cannot compute the carbon footprint 
 from known information, return "None".<</SYS>>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")

In [7]:
# Complete the query prompt
query_wrapper_prompt.format(query_str='hello')

'hello [/INST]'

In [8]:
# Import the llama index HF Wrapper
from llama_index.llms.huggingface import HuggingFaceLLM
# Create a HF LLM using the llama index wrapper 
llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=1024,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [9]:
# Bring in embeddings wrapper
from llama_index.embeddings.langchain import LangchainEmbedding
# Bring in HF embeddings - need these to represent document chunks
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [10]:
# Create and dl embeddings instance  
embeddings=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [11]:
# Bring in stuff to change settings
from llama_index.core import Settings

In [12]:
# Establish llama_index model settings
Settings.llm = llm
Settings.embed_model = embeddings
Settings.chunk_size=1024

In [13]:
# Import deps to load documents 
from llama_index.core import VectorStoreIndex, download_loader
from llama_index.core import SimpleDirectoryReader
from pathlib import Path

from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
from llama_index.vector_stores.chroma import ChromaVectorStore

from sentence_transformers import SentenceTransformer
from langchain.chains import RetrievalQA
from llama_index.core import Document

from llama_index.core.schema import TextNode

import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from IPython.display import Markdown, display

from llama_index.core import StorageContext



In [14]:
# prompt user to enter question
user_question = input("User:")

User: Latitude 3470


# Store csv data into SQL database

https://docs.llamaindex.ai/en/stable/examples/index_structs/struct_indices/SQLIndexDemo/

#### Create Database Schema

In [15]:
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
    Float,
)
from llama_index.core import SQLDatabase

engine = create_engine("sqlite:///:memory:")
metadata_obj = MetaData()

In [16]:
# create city SQL table
table_name = "device_specs"
device_specs_table = Table(
    table_name,
    metadata_obj,
    Column("Carbon Filename", String(16)),
    Column("Company", String(16)),
    Column("Device", String(16)),
    Column("Commercial_Name", String(16), primary_key=True),
    Column("PCF", Integer),
    Column("Manufacturing %", Float),
    Column("Chassis & Assembly %", Float),
    Column("Hard Drive %", Float),
    Column("SSD %", Float),
    Column("Power Supply %", Float),
    Column("Battery %", Float),
    Column("Mainboard and Other Boards %", Float),
    Column("Display %", Float),
    Column("Packaging %", Float),
    Column("Manufacturing Emissions", Float),
    Column("Chassis & Assembly Emissions", Float),
    Column("Hard Drive Emissions", Float),
    Column("SSD Emissions", Float),
    Column("Power Supply Emissions", Float),
    Column("Battery Emissions", Float),
    Column("Mainboard and Other Boards Emissions", Float),
    Column("Display Emissions", Float),
    Column("Packaging Emissions", Float),
    Column("Other Emissions", Float),
    Column("Specs Filename", String(16)),
    Column("Category", String(16)),
    Column("Processor Cores", Float),
    Column("RAM", Float),
    Column("SSD", Float),
    Column("HDD", Float),
    Column("Power", Float),
    Column("Display", Float),
    Column("Weight", Float)
)
metadata_obj.create_all(engine)

#### Add csv to database

In [21]:
from sqlalchemy import insert
import pandas as pd

sql_database = SQLDatabase(engine, include_tables=[table_name])

df = pd.read_csv('./data/Combined_Dataset_Final.csv')

# Convert the DataFrame into a list of dictionaries
rows = df.to_dict('records')

# Add csv to SQL database
for row in rows:
    stmt = insert(device_specs_table).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)


#### View database items

In [22]:
# view current table
stmt = select(
    device_specs_table.c.Company,
    device_specs_table.c.Device,
    device_specs_table.c["Commercial_Name"],
    device_specs_table.c.PCF,
).select_from(device_specs_table)

with engine.connect() as connection:
    results = connection.execute(stmt).fetchall()
    print(results)

[('Dell', 'Laptop', 'Latitude 3150', 216), ('Dell', 'Laptop', 'Latitude 3160', 244), ('Dell', 'Laptop', 'Latitude 3180', 243), ('Dell', 'Laptop', 'Latitude 3190', 226), ('Dell', 'Laptop', 'Latitude 3189', 257), ('Dell', 'Laptop', 'Latitude 3190 2-in-1', 230), ('Dell', 'Laptop', 'Latitude 3350', 258), ('Dell', 'Laptop', 'Latitude 3380', 252), ('Dell', 'Laptop', 'Latitude 3460', 279), ('Dell', 'Laptop', 'Latitude 3470', 286), ('Dell', 'Laptop', 'Latitude 3480', 324), ('Dell', 'Laptop', 'Latitude 3490', 243), ('Dell', 'Laptop', 'Latitude 3550', 263), ('Dell', 'Laptop', 'Latitude 3560', 298), ('Dell', 'Laptop', 'Latitude 3570', 302), ('Dell', 'Laptop', 'Latitude 3580', 311), ('Dell', 'Laptop', 'Latitude 3590', 254), ('Dell', 'Laptop', 'Latitude 5285 2-in-1', 258), ('Dell', 'Laptop', 'Latitude 5289 2-in-1', 259), ('Dell', 'Laptop', 'Latitude 5290', 218), ('Dell', 'Laptop', 'Latitude 5290 2-in-1', 268), ('Dell', 'Laptop', 'Latitude 5450', 247), ('Dell', 'Laptop', 'Latitude 5480', 250), ('Del

In [23]:
from sqlalchemy import text

# Surround user_question with quotes and add wildcard character % for partial matching
search_term = f"'%{user_question}%'"

# Use text() to create a SQL expression
sql_query = text(f"SELECT * FROM device_specs WHERE `Commercial_Name` LIKE {search_term}")

# Execute the query
with engine.connect() as con:
    rows = con.execute(sql_query)
    for row in rows:
        print(row)

('Dell_Carbon\\carbon-footprint-latitude-3470.pdf', 'Dell', 'Laptop', 'Latitude 3470', 286, 78.8, 4.9, 3.2, 0.0, 7.3, 2.8, 28.1, 33.1, 0.4, 225.368, 11.043032, 7.211776, 0.0, 16.451864, 6.310304, 63.328408, 74.596808, 0.901472, 45.524336, 'latitude-3470-laptop_owners-manual_en-us.pdf', 'Laptop', 4.0, 16.0, 2000.0, 2000.0, 90.0, 14.0, 1.81)


#### Set up Retriever

In [24]:
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core.retrievers import NLSQLRetriever

# default retrieval (return_raw=False)
nl_sql_retriever = NLSQLRetriever(
    sql_database, tables=[table_name], return_raw=False
)

In [25]:
#from llama_index.core.response.notebook_utils import display_source_node

#results = nl_sql_retriever.retrieve(
#    user_question
#)

# NOTE: all the content is in the metadata
#for n in results:
#    display_source_node(n, show_source_metadata=True)

#### Run pipeline

In [26]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(nl_sql_retriever, streaming=True)

response = query_engine.query(
    user_question
)
response.print_response_stream()

#print(str(response))

  attn_output = torch.nn.functional.scaled_dot_product_attention(


 Based on the provided information, I have calculated the carbon footprint of the Dell Latitude 3470 laptop as follows:

1. Manufacturing Emissions:
The carbon footprint of the manufacturing process of the Latitude 3470 is 225.368 tons of CO2 equivalent (tCO2e). This is calculated by multiplying the percentage of the manufacturing process (78.8%) by the total manufacturing emissions (225.368 tCO2e).
2. Chassis & Assembly Emissions:
The carbon footprint of the chassis and assembly process of the Latitude 3470 is 11.043032 tCO2e. This is calculated by multiplying the percentage of the chassis and assembly process (4.9%) by the total chassis and assembly emissions (225.368 tCO2e).
3. Hard Drive Emissions:
The carbon footprint of the hard drive of the Latitude 3470 is 7.211776 tCO2e. This is calculated by multiplying the percentage of the hard drive (3.2%) by the total hard drive emissions (2000.0 tCO2e).
4. SSD Emissions:
Since the Latitude 3470 does not use any SSDs, the carbon footprint