In [1]:
import os
from dotenv import load_dotenv
import os
from dotenv import load_dotenv, find_dotenv
import warnings
import requests
import json
import time
import pandas as pd
from utils import load_mistral_api_key
api_key, dlai_endpoint = load_mistral_api_key(ret_key=True)
from mistralai.models.chat_completion import ChatMessage

> Content Scraping & Saving

In [2]:
import requests
from bs4 import BeautifulSoup
import re

response = requests.get(
    "https://www.deeplearning.ai/the-batch/a-roadmap-explores-how-ai-can-detect-and-mitigate-greenhouse-gases/"
)
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
tag = soup.find("div", re.compile("^prose--styled"))
text = tag.text
print(text)

How can AI help to fight climate change? A new report evaluates progress so far and explores options for the future.What’s new: The Innovation for Cool Earth Forum, a conference of climate researchers hosted by Japan, published a roadmap for the use of data science, computer vision, and AI-driven simulation to reduce greenhouse gas emissions. The roadmap evaluates existing approaches and suggests ways to scale them up.How it works: The roadmap identifies 6 “high-potential opportunities”: activities in which AI systems can make a significant difference based on the size of the opportunity, real-world results, and validated research. The authors emphasize the need for data, technical and scientific talent, computing power, funding, and leadership to take advantage of these opportunities.Monitoring emissions. AI systems analyze data from satellites, drones, and ground sensors to measure greenhouse gas emissions. The European Union uses them to measure methane emissions, environmental orga

In [3]:
file_name = "AI_greenhouse_gas.txt"
with open(file_name, 'w') as file:
    file.write(text)

> Chunks and Embedding

In [4]:
chunk_size = 512
chunks = [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]

In [5]:
len(chunks)

8

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter

In [7]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

text_embeddings = SentenceTransformerEmbeddingFunction()
print(text_embeddings([chunks[2]]))

  from tqdm.autonotebook import tqdm, trange


[[-0.03955903276801109, -0.00035378654138185084, 0.07877398282289505, 0.07863625138998032, 0.05832453444600105, -0.0009619758347980678, -0.009307589381933212, -0.04861386492848396, 0.020098600536584854, 0.0033103181049227715, -0.02965579554438591, -0.05278736352920532, -0.0510658361017704, -0.021487964317202568, -0.020765308290719986, 0.04218614101409912, -0.02143150568008423, -0.0012820939300581813, -0.004704076796770096, -0.06867031008005142, 0.044696297496557236, 0.07134471833705902, 0.007126348093152046, -0.06500551104545593, 0.1176089271903038, 0.060248155146837234, -0.04241504520177841, 0.031324539333581924, -0.03115120716392994, 0.06469538062810898, -0.01382162980735302, 0.009207168594002724, -0.0841534286737442, 0.005865441169589758, 0.0026744252536445856, 0.006445672363042831, -0.049459267407655716, 0.03081035241484642, 0.01990455947816372, 0.08477380871772766, -0.03489053621888161, -0.0862734243273735, 0.056319139897823334, -0.054586369544267654, -0.006946907378733158, 0.0084

In [8]:
print(text_embeddings([chunks[2]]))

[[-0.03955903276801109, -0.00035378654138185084, 0.07877398282289505, 0.07863625138998032, 0.05832453444600105, -0.0009619758347980678, -0.009307589381933212, -0.04861386492848396, 0.020098600536584854, 0.0033103181049227715, -0.02965579554438591, -0.05278736352920532, -0.0510658361017704, -0.021487964317202568, -0.020765308290719986, 0.04218614101409912, -0.02143150568008423, -0.0012820939300581813, -0.004704076796770096, -0.06867031008005142, 0.044696297496557236, 0.07134471833705902, 0.007126348093152046, -0.06500551104545593, 0.1176089271903038, 0.060248155146837234, -0.04241504520177841, 0.031324539333581924, -0.03115120716392994, 0.06469538062810898, -0.01382162980735302, 0.009207168594002724, -0.0841534286737442, 0.005865441169589758, 0.0026744252536445856, 0.006445672363042831, -0.049459267407655716, 0.03081035241484642, 0.01990455947816372, 0.08477380871772766, -0.03489053621888161, -0.0862734243273735, 0.056319139897823334, -0.054586369544267654, -0.006946907378733158, 0.0084

In [9]:
text_embeddings

<chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction at 0x1e2ac74afe0>

> Converting list of Embeddings into Array

In [10]:
import numpy as np

# Assuming chunks is a list of text chunks
text_embeddings = np.array([text_embeddings([chunk]) for chunk in chunks])

In [11]:
text_embeddings

array([[[-0.01831476,  0.03175715,  0.07731699, ..., -0.00084819,
         -0.10899568, -0.02484642]],

       [[ 0.02106628,  0.01000826,  0.00710219, ...,  0.03524455,
         -0.07400183, -0.09941633]],

       [[-0.03955903, -0.00035379,  0.07877398, ...,  0.01687794,
         -0.06168801, -0.00540015]],

       ...,

       [[-0.0573559 ,  0.01041865,  0.04680414, ...,  0.00428574,
          0.00061479, -0.00822853]],

       [[-0.05093888,  0.07361198,  0.01079245, ..., -0.03200419,
         -0.01095883, -0.03453279]],

       [[ 0.0022917 ,  0.03432366,  0.09202603, ..., -0.02386329,
         -0.05846413, -0.05515086]]])

In [12]:
print(text_embeddings.shape) #3D Array

(8, 1, 384)


In [13]:
# Flatten the embeddings to remove the additional list layer
text_embeddings_flat = np.array([embedding[0] for embedding in text_embeddings])

# Now you can check the shape of the embeddings
print(len(text_embeddings_flat[0]))  # This should output 1024


384


In [14]:
text_embeddings_flat

array([[-0.01831476,  0.03175715,  0.07731699, ..., -0.00084819,
        -0.10899568, -0.02484642],
       [ 0.02106628,  0.01000826,  0.00710219, ...,  0.03524455,
        -0.07400183, -0.09941633],
       [-0.03955903, -0.00035379,  0.07877398, ...,  0.01687794,
        -0.06168801, -0.00540015],
       ...,
       [-0.0573559 ,  0.01041865,  0.04680414, ...,  0.00428574,
         0.00061479, -0.00822853],
       [-0.05093888,  0.07361198,  0.01079245, ..., -0.03200419,
        -0.01095883, -0.03453279],
       [ 0.0022917 ,  0.03432366,  0.09202603, ..., -0.02386329,
        -0.05846413, -0.05515086]])

In [15]:
print(text_embeddings_flat.shape) #2D array

(8, 384)


> Storing "Content" Embeddings into the Faiss DB

In [17]:
import faiss
import numpy as np

#Assuming text_embeddings is 3D with shape (8, 1, 384)
#Flatten to 2D
#text_embeddings_flat = text_embeddings.reshape(-1, text_embeddings.shape[2])

# Now text_embeddings_flat has shape (8, 384)
d = text_embeddings_flat.shape[1]
index = faiss.IndexFlatL2(d)

# Adding flattened embeddings to the Faiss index
index.add(text_embeddings_flat)


In [18]:
text_embeddings_flat

array([[-0.01831476,  0.03175715,  0.07731699, ..., -0.00084819,
        -0.10899568, -0.02484642],
       [ 0.02106628,  0.01000826,  0.00710219, ...,  0.03524455,
        -0.07400183, -0.09941633],
       [-0.03955903, -0.00035379,  0.07877398, ...,  0.01687794,
        -0.06168801, -0.00540015],
       ...,
       [-0.0573559 ,  0.01041865,  0.04680414, ...,  0.00428574,
         0.00061479, -0.00822853],
       [-0.05093888,  0.07361198,  0.01079245, ..., -0.03200419,
        -0.01095883, -0.03453279],
       [ 0.0022917 ,  0.03432366,  0.09202603, ..., -0.02386329,
        -0.05846413, -0.05515086]])

> User Question + User Question Embedding

In [19]:
embedder = SentenceTransformerEmbeddingFunction()
question = "What are the ways that AI can reduce emissions in transportation?"
question_embedding = embedder([question])
print(question_embedding)


[[0.02988814376294613, 0.05839552357792854, 0.11218699812889099, 0.04920748993754387, 0.0446867011487484, 0.009189292788505554, 0.04188433289527893, -0.0051221707835793495, -0.037766244262456894, 0.010701172985136509, -0.025033077225089073, -0.0035883847158402205, 0.0010903169168159366, 0.0336310900747776, -0.024126743897795677, 0.07356739044189453, 0.049241356551647186, 0.07785377651453018, -0.054245442152023315, -0.10719998180866241, 0.026045842096209526, -0.015225091949105263, -0.019000673666596413, -0.004718645941466093, -0.008320369757711887, 0.05180411785840988, -0.020068587735295296, -0.05578949674963951, -0.001178371487185359, -0.014374682679772377, 0.046635158360004425, -0.01808410882949829, -0.040803659707307816, 0.014643152244389057, -0.04651839658617973, 0.00020199334539938718, -0.019327299669384956, 0.03648943826556206, 0.08085886389017105, -0.033133383840322495, -0.013701600022614002, -0.10175183415412903, -0.019602561369538307, -0.06853427737951279, 0.008785712532699108,

> Converting User Question Embeddings into Array

In [20]:
import numpy as np
question_embedding_np = np.array(question_embedding, dtype='float32').reshape(1, -1)

In [21]:
D, I = index.search(question_embedding_np, k=2)
print(I)

[[7 5]]


> Search for the chunk that are similar to the User Query

In [22]:
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
print(retrieved_chunk)

['eration, manufacturing, food production, and transportation — could make a significant dent in greenhouse gas emissions.We’re thinking:\xa0AI also has an important role to play in advancing the science of climate geoengineering, such as stratospheric aerosol injection (SAI), to cool down the planet. More research is needed to determine whether SAI is a good idea, but AI-enabled climate modeling will help answer this question.', 'arriers to taking full advantage of AI in the food industry.Transportation.\xa0AI systems can reduce greenhouse-gas emissions by improving traffic flow, ameliorating congestion, and optimizing public transportation. Moreover, reinforcement learning can reduce the impact of electric vehicles on the power grid by optimizing their charging. More data, uniform standards, and AI talent are needed to realize this potential.Materials.\xa0Materials scientists use AI models to study traits of existing materials and design']


> Creating prompt using user question and retirved chunk

In [23]:
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

> Passing prompt to the Model

In [24]:
from utils import mistral
response = mistral(prompt)
print(response)

AI can reduce greenhouse gas emissions in transportation in several ways:

1. **Improving Traffic Flow and Ameliorating Congestion**: AI systems can analyze real-time traffic data to optimize traffic signal timing and reroute vehicles to reduce congestion, thereby lowering emissions.

2. **Optimizing Public Transportation**: AI can help schedule and route public transportation more efficiently, reducing wait times and improving overall system efficiency, which in turn reduces emissions.

3. **Optimizing Electric Vehicle (EV) Charging**: Reinforcement learning algorithms can optimize the charging of electric vehicles to minimize their impact on the power grid, thereby reducing overall emissions.

However, to fully realize these potential benefits, more data, uniform standards, and AI talent are needed.
