# Documents processing

In [11]:
import pandas as pd

def csv_to_string(filepath):
    df = pd.read_csv(filepath)
    return df.to_csv(index=True)

csv_string = csv_to_string('data.csv')
print(csv_string)

,,,ID,Owner,Holder,Hydrogen Type,Asset State,Quantity
0,producer, consumer,producer, transporter, consumer,green,utilized,111.0
1,producer, consumer,producer, transporter,pink,H2,112,
2,producer, consumer,producer, transporter, consumer,yellow,H2,100.0



In [2]:
corpus_of_documents = [
        # "What is green hydrogen?",
        "Green hydrogen is hydrogen gas produced through renewable energy sources, like wind or solar power, by using a process called electrolysis. This method splits water into hydrogen and oxygen without generating carbon emissions, making it environmentally friendly.",
#       "What is blockchain, and how does it work?",
        "Blockchain is a digital ledger technology that stores information across a distributed network of computers. It is secure, transparent, and cannot be easily altered. Each transaction on a blockchain is recorded in blocks and linked (or 'chained') to previous transactions, ensuring a clear and tamper-proof history.",
#       "Why is blockchain used to trace green hydrogen?",
        "Blockchain provides a transparent and tamper-proof way to track the production and distribution of green hydrogen. It ensures that every step in the supply chain, from production to delivery, is recorded, giving consumers and regulators confidence in the hydrogen’s green credentials.",
#       "How does this blockchain contract trace green hydrogen?",
        "The smart contract on the blockchain records key information about each stage of green hydrogen production and distribution, such as: the source of renewable energy used for production, the quantity of hydrogen produced, and the locations and companies involved in transportation and storage. This data is stored securely and can be verified by anyone, ensuring transparency in the hydrogen supply chain.",
#       "What is a smart contract?",
        "A smart contract is a self-executing program on the blockchain that automatically enforces agreements once the predetermined conditions are met. For tracing green hydrogen, the contract ensures that data about each production stage is securely recorded without needing a middleman.",
#       "How can I trust the data on the blockchain?",
        "Blockchain ensures data integrity by making it immutable (unchangeable). Once a transaction is recorded, it cannot be altered without the consensus of the entire network, making it nearly impossible to falsify information.",
#       "How can I access the blockchain to trace green hydrogen?",
        "You can access the blockchain using a blockchain explorer, which allows you to see all transactions related to green hydrogen. The blockchain explorer for Polygon, for instance, provides an interface to view the production and distribution history in real time.",
#       "What is Polygon, and why is it used for this contract?",
        "Polygon is a blockchain network designed to be faster and cheaper than other popular blockchains like Ethereum. It allows us to deploy smart contracts and track green hydrogen in a cost-efficient and scalable way, without compromising on security.",
#       "Do I need to know about blockchain to use this system?",
        "No, you don’t need to understand the technical details of blockchain to use the system. The contract’s functionality is designed to be user-friendly, providing an easy way to verify the sustainability of green hydrogen without requiring blockchain knowledge.",
#       "How does using blockchain benefit the environment?",
        "By using blockchain, we ensure that the production of green hydrogen is transparent, encouraging accountability and reducing fraud in the sustainability claims. This helps promote genuine green energy initiatives, contributing to the fight against climate change."
]

In [19]:
import json

def save_dict_to_json(dictionary, filename):
    with open(filename, 'w') as f:
        json.dump(dictionary, f)

my_dict = {'Info': corpus_of_documents}
save_dict_to_json(my_dict, 'blockchain_information.json')

In [21]:
def load_dict_from_json(filename):
    with open(filename, 'r') as f:
        dictionary = json.load(f)
    return dictionary

my_dict = load_dict_from_json('blockchain_information.json')
print(my_dict['Info'])

['Green hydrogen is hydrogen gas produced through renewable energy sources, like wind or solar power, by using a process called electrolysis. This method splits water into hydrogen and oxygen without generating carbon emissions, making it environmentally friendly.', "Blockchain is a digital ledger technology that stores information across a distributed network of computers. It is secure, transparent, and cannot be easily altered. Each transaction on a blockchain is recorded in blocks and linked (or 'chained') to previous transactions, ensuring a clear and tamper-proof history.", 'Blockchain provides a transparent and tamper-proof way to track the production and distribution of green hydrogen. It ensures that every step in the supply chain, from production to delivery, is recorded, giving consumers and regulators confidence in the hydrogen’s green credentials.', 'The smart contract on the blockchain records key information about each stage of green hydrogen production and distribution, 

# Rag without LLM

## Similarity measure

In [3]:
def jaccard_similarity(query, document):
    query = query.lower().split(" ")
    document = document.lower().split(" ")
    intersection = set(query).intersection(set(document))
    union = set(query).union(set(document))
    return len(intersection)/len(union)

def return_response(user_input, corpus_of_documents):
    similarities = []
    for doc in corpus_of_documents:
        similarity = jaccard_similarity(user_input, doc)
        similarities.append(similarity)
    
    index = similarities.index(max(similarities))
    # print(similarities)
    return corpus_of_documents[index]

In [49]:
user_input = "How to access the blockchain?"
return_response(user_input, corpus_of_documents)

'You can access the blockchain using a blockchain explorer, which allows you to see all transactions related to green hydrogen. The blockchain explorer for Polygon, for instance, provides an interface to view the production and distribution history in real time.'

In [50]:
user_input = "Do I need to understand technical details of blockchain to use the system?"
return_response(user_input, corpus_of_documents)

'No, you don’t need to understand the technical details of blockchain to use the system. The contract’s functionality is designed to be user-friendly, providing an easy way to verify the sustainability of green hydrogen without requiring blockchain knowledge.'

In [51]:
user_input = "Do I need to understand technical details of blockchain technology to use the frontend?"
return_response(user_input, corpus_of_documents)

'No, you don’t need to understand the technical details of blockchain to use the system. The contract’s functionality is designed to be user-friendly, providing an easy way to verify the sustainability of green hydrogen without requiring blockchain knowledge.'

In [53]:
user_input = "what is green hydrogen?"
return_response(user_input, corpus_of_documents)

'No, you don’t need to understand the technical details of blockchain to use the system. The contract’s functionality is designed to be user-friendly, providing an easy way to verify the sustainability of green hydrogen without requiring blockchain knowledge.'

## Rag with Local LLama (Ollama)

In [54]:
import requests
import json

In [56]:

# https://github.com/ollama/ollama/blob/main/docs/api.md
prompt = """
You are a bot that explain how to use a blockchain traceability platform. You answer in very short sentences and do not include extra information.
This is the recommended answer to the user input: {relevant_document}
The user input is: {user_input}
Compile a recommendation to the user based on the recommended answer and the user input.
"""

In [70]:
def get_llama_answer(user_input,relevant_document, url = 'http://127.0.0.1:11434/api/generate'):
    full_response = []
    data = {
        "model": "llama3.2",
        "prompt": prompt.format(user_input=user_input, relevant_document=relevant_document)
    }
    headers = {'Content-Type': 'application/json'}
    response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)
    try:
        count = 0
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line.decode('utf-8'))

                full_response.append(decoded_line['response'])
    finally:
        response.close()
    print(''.join(full_response))

In [73]:
user_input = "what is green hydrogen?"
relevant_document = return_response(user_input, corpus_of_documents)
print(relevant_document)
print('----------RAG ANSWER------------')
get_llama_answer(user_input,relevant_document)

No, you don’t need to understand the technical details of blockchain to use the system. The contract’s functionality is designed to be user-friendly, providing an easy way to verify the sustainability of green hydrogen without requiring blockchain knowledge.
----------RAG ANSWER------------
Green hydrogen is a clean source of energy produced by electrolyzing water using renewable electricity.

Recommendation:
To use the blockchain traceability platform, start by creating an account and registering your product or company. Then, follow the guided tour to learn how to upload your data and track its journey. Once you're familiar with the system, explore the dashboard to verify the sustainability of green hydrogen produced by your company.


In [74]:
user_input = "How to access the blockchain?"
relevant_document = return_response(user_input, corpus_of_documents)
print(relevant_document)
print('----------RAG ANSWER------------')
get_llama_answer(user_input,relevant_document)

You can access the blockchain using a blockchain explorer, which allows you to see all transactions related to green hydrogen. The blockchain explorer for Polygon, for instance, provides an interface to view the production and distribution history in real time.
----------RAG ANSWER------------
You can use a blockchain explorer to access the blockchain.


# RAG with Groq

In [4]:
import yaml
with open('LLM_API_KEY.yml','r') as file:
    credentials = yaml.safe_load(file)
    
GROQ_API_KEY = credentials['GROQ_API_KEY']

In [5]:
import os
from groq import Groq

client = Groq(
    # This is the default and can be omitted
    api_key = GROQ_API_KEY,
)

In [13]:
def get_groq_answer(user_input, relevant_document, table):
    prompt = """
    You are a bot that explain how to use a blockchain traceability platform. You answer in very short sentences and do not include extra information.
    This is the blockchain contract information: {table}
    Holder column indicates who holds the corresponding hydrogen lot, Owner column indicates who owns the hydrogen lot and the other columns indicate properties of the hydrogen lot produced by the company.
    This is the recommended answer to the user input: {relevant_document}
    The user input is: {user_input}
    Compile a recommendation to the user based on the recommended answer and the user input.
    """
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt.format(user_input = user_input, relevant_document = relevant_document, table = table),
            }
        ],
        model="llama3-8b-8192",
    )
    print(chat_completion.choices[0].message.content)
    return

In [15]:

user_input = "How to access the blockchain?"
relevant_document = return_response(user_input, corpus_of_documents)


print(relevant_document)
print('----------RAG ANSWER------------')
get_groq_answer(user_input,relevant_document,csv_string)

You can access the blockchain using a blockchain explorer, which allows you to see all transactions related to green hydrogen. The blockchain explorer for Polygon, for instance, provides an interface to view the production and distribution history in real time.
----------RAG ANSWER------------
You can access the blockchain using a blockchain explorer.


In [14]:
user_input = "How many contracts are being transported?"
relevant_document = return_response(user_input, corpus_of_documents)


print(relevant_document)
print('----------RAG ANSWER------------')
get_groq_answer(user_input,relevant_document,csv_string)

A smart contract is a self-executing program on the blockchain that automatically enforces agreements once the predetermined conditions are met. For tracing green hydrogen, the contract ensures that data about each production stage is securely recorded without needing a middleman.
----------RAG ANSWER------------
Based on the contract information, only 1 contract has "transporter" as the Holder, so only 1 contract is being transported.
