In [None]:
# Instruction to use:

# 1. create your Gemini and OpenAI api keys 
# 2. Install all python modules added in the code
# 3. Restart the kernal and run all the cells


In [None]:
# Install Libraries

# pip install google.generativeai
# pip install openai
# pip install pyPDF2
# pip install langchain
# pip install tiktoken
# pip install faiss-cpu

In [1]:
# Required Libraries

import google.generativeai as genai
from openai import OpenAI
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import OpenAI

In [12]:
# Required Secret Key

# Get your key: https://ai.google.dev/
gemini_key =

# Get your key: https://platform.openai.com/signup
open_ai_key =

SyntaxError: invalid syntax (1173776649.py, line 4)

In [3]:
# Input

gdp_img = "EA_GDP.jpeg"
pdf_loc = [r"2024.pdf"]

In [4]:
# Code to get image information

def input_image_setup(file_loc):
    from pathlib import Path

    if not (img := Path(file_loc)).exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/jpeg",
            "data": Path(file_loc).read_bytes()
            }
        ]
    return image_parts

def get_image_info(image_loc):

    genai.configure(api_key=gemini_key)

    # Set up the model
    generation_config = {
        "temperature":0.4,
        "top_p":1,
        "top_k":32,
        "max_output_tokens":4096,
    }

    model = genai.GenerativeModel(model_name="gemini-pro-vision", generation_config=generation_config)

    input_prompt = """ You are an expert in understanding graphs/visualisation.
    You will receive images with visualisation and stats you will have provide tabular representation of the data """

    question_prompt = "Give a brief context and tabular representation of entire data"

    image_prompt = input_image_setup(image_loc)
    prompt_parts = [input_prompt, image_prompt[0], question_prompt]
    response = model.generate_content(prompt_parts)
    return str(response.text)

gemini_img_output = get_image_info(gdp_img)
print(gemini_img_output)

 The graph shows the GDP growth of East Asia from 2018 to 2021. The total GDP growth is shown in the blue line, while the per capita GDP growth is shown in the light blue line. The total GDP growth was highest in 2018, at 5.9%, and lowest in 2020, at 0.4%. The per capita GDP growth was highest in 2018, at 5.2%, and lowest in 2020, at -0.4%.


| Year | Total GDP Growth | Per Capita GDP Growth |
|---|---|---|
| 2018 | 5.9% | 5.2% |
| 2019 | 5.2% | 4.6% |
| 2020 | 0.4% | -0.4% |
| 2021 | 6.7% | 6.2% |


In [5]:
# Code to get information using Chat-gpt 3.5 Turbo

def gpt_model(gpt_inp):

    template = ("""You are an expert in data updation. You have got context and data to be updated.
    Task is to understand the context of the data and update the data till 2024 with your own knowledge
    and return JSON output""")

    client = OpenAI(api_key=open_ai_key)

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role" : "user",
                "content" : template + gpt_inp,
            }
        ],
        model="gpt-3.5-turbo",
    )

    return(chat_completion.choices[0].message.content)

gpt_output = gpt_model(gemini_img_output)
print(gpt_output)

{
  "data": {
    "2018": {
      "Total GDP Growth": "5.9%",
      "Per Capita GDP Growth": "5.2%"
    },
    "2019": {
      "Total GDP Growth": "5.2%",
      "Per Capita GDP Growth": "4.6%"
    },
    "2020": {
      "Total GDP Growth": "0.4%",
      "Per Capita GDP Growth": "-0.4%"
    },
    "2021": {
      "Total GDP Growth": "6.7%",
      "Per Capita GDP Growth": "6.2%"
    },
    "2022": {
      "Total GDP Growth": "5.5%",
      "Per Capita GDP Growth": "5.0%"
    },
    "2023": {
      "Total GDP Growth": "4.8%",
      "Per Capita GDP Growth": "4.2%"
    },
    "2024": {
      "Total GDP Growth": "5.3%",
      "Per Capita GDP Growth": "4.8%"
    }
  }
}


In [6]:
# Code to get information using Gemini model

def gemini_model(gemini_inp):
    
    genai.configure(api_key=gemini_key)

    model = genai.GenerativeModel(model_name="gemini-pro")

    template = ("""You are an expert in data updation. You have got context and data to be updated.
    Task is to understand the context of the data and update the data till 2024 with your own knowledge
    and return JSON output""")

    response = model.generate_content(template + gemini_inp)
    return str(response.text)

gemini_output = gemini_model(gemini_img_output)
print(gemini_output)

```
{
  "2022": {
    "Total GDP Growth": "4.9%",
    "Per Capita GDP Growth": "4.3%"
  },
  "2023": {
    "Total GDP Growth": "5.3%",
    "Per Capita GDP Growth": "4.7%"
  },
  "2024": {
    "Total GDP Growth": "5.6%",
    "Per Capita GDP Growth": "5.0%"
  }
}
```


In [7]:
# Code to create vector database using FIASS

def convert_pdf_to_vector_db(pdf_paths):

    text = ""
    for pdf_path in pdf_paths:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()

    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
    chunks = text_splitter.split_text(text)

    embeddings = OpenAIEmbeddings(openai_api_key=open_ai_key)
    return FAISS.from_texts(chunks, embeddings)

vector_store = convert_pdf_to_vector_db(pdf_loc)

  warn_deprecated(


In [9]:
# Code to get information using Chat-gpt 3.5 Turbo + PDF knowledge

def gpt_model_with_knowledge(vector_db, user_input):
    docs = vector_db.similarity_search(user_input)

    llm = OpenAI(openai_api_key=open_ai_key)
    chain = load_qa_chain(llm, chain_type="stuff")

    template = ("""You are an expert in data updation. You have got context and data to be updated.
    Task is to understand the context of the data and update the data till 2024 from the document.
    and return JSON output""")

    response_gpt = chain.run(input_documents=docs, question=template+user_input)
    return response_gpt

gpt_output_with_knowledge = gpt_model_with_knowledge(vector_store, gemini_img_output)
print(gpt_output_with_knowledge)

  warn_deprecated(



| Year | Total GDP Growth | Per Capita GDP Growth |
|---|---|---|
| 2018 | 5.9% | 5.2% |
| 2019 | 5.2% | 4.6% |
| 2020 | 0.4% | -0.4% |
| 2021 | 6.7% | 6.2% |
| 2022 | 4.9% | 4.5% |
| 2023 | 4.6% | 4.3% |
| 2024 | 4.3% | 4.0% |

JSON Output: 
{
  "region": "East Asia",
  "country": ["Japan", "Republic of Korea"],
  "year": [2018, 2019, 2020, 2021, 2022, 2023, 2024],
  "total_gdp_growth": [5.9, 5.2, 0.4, 6.7, 4.9, 4.6, 4.3],
  "per_capita_gdp_growth": [5.2, 4.


In [11]:
# Code to get information using Gemini model + PDF knowledge

def gemini_model_with_knowledge(vector_db, user_input):

    docs = vector_db.similarity_search(user_input)

    genai.configure(api_key=gemini_key)

    model = genai.GenerativeModel(model_name="gemini-pro")

    template = ("""You are an expert in data updation. You have got context and data to be updated.
        Task is to understand the context of the data and update the data till 2024 with your own knowledge
        and return JSON output""")

    response = model.generate_content(str(docs) + template + user_input)

    return str(response.text)

gpt_output_with_knowledge = gemini_model_with_knowledge(vector_store, gemini_img_output)
print(gpt_output_with_knowledge)

```JSON
{
 "East Asia GDP Growth 2018-2024": {
  "2018":{
   "Total GDP Growth":"5.9%",
   "Per Capita GDP Growth":"5.2%"
  },
  "2019":{
   "Total GDP Growth":"5.2%",
   "Per Capita GDP Growth":"4.6%"
  },
  "2020":{
   "Total GDP Growth":"0.4%",
   "Per Capita GDP Growth":"-0.4%"
  },
  "2021":{
   "Total GDP Growth":"6.7%",
   "Per Capita GDP Growth":"6.2%"
  },
  "2022":{
   "Total GDP Growth":"3.1%",
   "Per Capita GDP Growth":"2.8%"
  },
  "2023":{
   "Total GDP Growth":"4.6%",
   "Per Capita GDP Growth":"4.3%"
  },
  "2024":{
   "Total GDP Growth":"4.6%",
   "Per Capita GDP Growth":"4.3%"
  }
 }
}
```
