In [41]:
import os
import streamlit as st
import pickle
import time

from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
# from langchain import OpenAI
# from langchain.embeddings import OpenAIEmbeddings
from langchain_google_genai import GoogleGenerativeAI
from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import SeleniumURLLoader

from langchain.chains import ConversationalRetrievalChain


In [35]:
GOOGLE_API_KEY = "AIzaSyAKEaaM7fWIErN3VbikjP_T5m0UfhBy5iE"
llm = GoogleGenerativeAI(model='gemini-pro',google_api_key=GOOGLE_API_KEY)

### LOAD DATA

In [3]:

loader = SeleniumURLLoader(
    urls = [
        "https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/"
    ]
)
data = loader.load()
len(data)


2024-05-21 23:03:28.116 INFO    unstructured: Reading document from string ...
2024-05-21 23:03:28.125 INFO    unstructured: Reading document ...


1

### SPLIT DATA

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
len(docs)

16

In [10]:
from langchain.text_splitter import CharacterTextSplitter
def get_text_chunks(text):
    text_splitter=CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_documents(text)
    return chunks
text_chunk = get_text_chunks(data)
len(text_chunk)



12

In [14]:
text_chunk[0]

Document(page_content='News\nFuelEU Explainer: Pooling and the Business Case for Green Shipping\nPublished — April 24, 2024\nThis is the second article of our series on FuelEU Maritime Regulation from the Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping (MMMCZCS). We will share the latest analysis, strategic insights, and practical tools for organizations to leverage FuelEU for achieving decarbonization goals.Subscribe to our newsletter "Countdown to FuelEU" here.\nThe big picture\nFuelEU’s pooling mechanism can drive uptake of green shipping. It does this by creating a way for vessels that go beyond the annual GHG intensity targets (reducing more emissions than is required by the rules) to share their overcompliance with other vessels. For the mechanism to drive investment, companies need reliable estimates of what overcompliance is worth. Here, we lay out how pools can work, how companies can think about the value of pooling, and a calculator you can use right away to help quan

In [13]:
docs[0]

Document(page_content='News\n\nFuelEU Explainer: Pooling and the Business Case for Green Shipping\n\nPublished — April 24, 2024\n\nThis is the second article of our series on FuelEU Maritime Regulation from the Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping (MMMCZCS). We will share the latest analysis, strategic insights, and practical tools for organizations to leverage FuelEU for achieving decarbonization goals.Subscribe to our newsletter "Countdown to FuelEU" here.\n\nThe big picture', metadata={'source': 'https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/', 'title': 'Explainer: Pooling and the business case for green shipping | Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping', 'description': 'No description found.', 'language': 'en'})

### Create Embeddings for the chunks and save it to faiss index

In [54]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = GooglePalmEmbeddings(google_api_key=GOOGLE_API_KEY)

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_palm = FAISS.from_documents(docs,embeddings)

In [55]:
# Save the vectorstore object locally
vectorindex_palm.save_local("vectorstore")

In [56]:
# Load the vectorstore object
x = FAISS.load_local("vectorstore", embeddings,allow_dangerous_deserialization=True)

In [57]:
retriever =x.as_retriever()
rdocs= retriever.get_relevant_documents("purpose of pooling")
rdocs

[Document(page_content='Pooling value calculator', metadata={'source': 'https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/', 'title': 'Explainer: Pooling and the business case for green shipping | Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping', 'description': 'No description found.', 'language': 'en'}),
 Document(page_content='A business case?\n\nThe goal of the pooling mechanism is to provide “incentives for investment in more advanced technologies” (FuelEU Recital (57)).FuelEU pooling allows vessels that overachieve on the intensity targets to generate value by monetizing the excess compliance, known as ‘surplus’, with other vessels. If companies can find buyers for the surplus, this can become another source of revenue that potentially closes the cost gap between alternative and fossil fuels. FuelEU pooling, thus, may create a business case for sailing green (read more about building a business case in our recent report ‘Tran

### Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [49]:
from langchain.llms import GooglePalm
from langchain.chains import RetrievalQA

In [31]:
llm = GooglePalm(google_api_key=GOOGLE_API_KEY)

In [58]:
chain = RetrievalQA.from_chain_type(llm =llm,
            chain_type="stuff",
            retriever=retriever,
            input_key ="query",
            return_source_documents=True)
chain("What is the pooling concept?")

{'query': 'What is the pooling concept?',
 'result': 'The pooling concept is a mechanism that allows vessels that go beyond the annual GHG intensity targets (reducing more emissions than is required by the rules) to share their overcompliance with other vessels.',
 'source_documents': [Document(page_content='Pooling value calculator', metadata={'source': 'https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/', 'title': 'Explainer: Pooling and the business case for green shipping | Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping', 'description': 'No description found.', 'language': 'en'}),
  Document(page_content='The big picture\n\nFuelEU’s pooling mechanism can drive uptake of green shipping. It does this by creating a way for vessels that go beyond the annual GHG intensity targets (reducing more emissions than is required by the rules) to share their overcompliance with other vessels. For the mechanism to drive investment, companie

In [42]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,return_source_documents=True, retriever=x.as_retriever())
chain

RetrievalQAWithSourcesChain(combine_documents_chain=MapReduceDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template='Use the following portion of a long document to see if any of the text is relevant to answer the question. \nReturn any relevant text verbatim.\n{context}\nQuestion: {question}\nRelevant text, if any:'), llm=GoogleGenerativeAI(model='gemini-pro', google_api_key=SecretStr('**********'), client=genai.GenerativeModel(
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
))), document_prompt=PromptTemplate(input_variables=['page_content', 'source'], template='Content: {page_content}\nSource: {source}'), document_variable_name='summaries')), document_variable_name='context'), return_source_documents=True, retriever=Vecto

In [43]:
query = "what is the pooling concept"

In [44]:
# langchain.debug=True

chain({"question": query}, return_only_outputs=True)

{'answer': "I don't know. The provided text does not contain the answer to your question.\n",
 'sources': 'https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/',
 'source_documents': [Document(page_content='Pooling value calculator', metadata={'source': 'https://www.zerocarbonshipping.com/news/explainer-pooling-and-the-business-case-for-green-shipping/', 'title': 'Explainer: Pooling and the business case for green shipping | Mærsk Mc-Kinney Møller Center for Zero Carbon Shipping', 'description': 'No description found.', 'language': 'en'}),
  Document(page_content='Who are the potential market actors?\nPool lead (seller) is the vessel which overachieves on the compliance target and decides to share the surplus with other vessels.\nPool participant (buyer) is a vessel looking to achieve compliance through the surplus of the pool lead.\nPool broker is a role open to any entity, including third parties, capable of connecting sellers with buyer