# Gemini
No fine-tuning, but taking advantage of the large context window instead.
* https://ai.google.dev/tutorials/tuning_quickstart_python
* https://github.com/google/generative-ai-python/tree/v0.3.0/google/generativeai
* https://ai.google.dev/gemini-api/docs/get-started/python
* https://aistudio.google.com/app/prompts/new_chat?utm_source=onboarding&utm_medium=email&utm_campaign=welcome&utm_content=
* `gemini-pro`: optimized for text-only prompts.
* `gemini-pro-vision`: optimized for text-and-images prompts.

In [8]:
with open('gemini_api_key.txt','r') as f:
    my_gemini_api_key = f.read()

In [9]:
from pathlib import Path
import hashlib
import google.generativeai as genai

genai.configure(api_key=my_gemini_api_key)

# Set up the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 0,
  "max_output_tokens": 8192,
}

safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
]

model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

  from .autonotebook import tqdm as notebook_tqdm


## Adding extracted paragraphs in contex window

In [5]:
import os
import glob
import pandas as pd

datasets = sorted(glob.glob(os.path.join(os.getcwd(), 
                                            'data', 
                                            'DRS', 
                                            'dataset', 
                                            'all_drs', 
                                            'dataset_gemini raw data_*.parquet')), 
				key=os.path.getctime, 
				reverse=True)

df_dataset = pd.read_parquet(datasets[0])
df_dataset = df_dataset.reset_index()

In [12]:
history=[]
for index, row in df_dataset.iterrows():
    history.append({"role": "user",
                    "parts": [row["0"]]})

## Adding whole document in contex window

In [6]:
import os
import glob
import pandas as pd
import PyPDF2
import datetime

# drs_doc_types = ['AC',
#                  'ORDER_8300.10',
#                  'ORDER_8400.10',
#                  'ORDER_8700.1',
#                  'ORDER_8740.1',
#                  'ORDER_8900.1',
#                  'ORDERS']
drs_doc_types = ['AC']

history=[]

df_all_content = None

for drs_doc_type in drs_doc_types:
    df_index = glob.glob(os.path.join(os.getcwd(), 'data', 'DRS', 'index', drs_doc_type+'*.parquet'))[-1]
    df_index = pd.read_parquet(df_index)

    for index, row in df_index.iterrows():
        matching_file = glob.glob(os.path.join(os.getcwd(), 'data', 'DRS', 'raw data', drs_doc_type, 'pdf','*'+row['documentGuid']+'*.pdf'))
        if len(matching_file)>0:
            if row['drs:status']=='Current':
                pdf = PyPDF2.PdfReader(matching_file[0])
                '\n\n====== NEW PAGE ======\n\n'.join([page.extract_text() for page in pdf.pages])
                history.append({"role": "user",
                                "parts": ['\n\n====== NEW PAGE ======\n\n'.join([page.extract_text() for page in pdf.pages])]})

unknown widths : 
[0, IndirectObject(676, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(680, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(684, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(688, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(701, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(705, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(709, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(713, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(717, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(721, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(725, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(729, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(138, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(141, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(144, 0, 138538041830576)]
unknown widths : 
[0, IndirectObject(147, 0, 1385380418

## Filtering history

In [10]:
convo = model.start_chat(history=history[0:10])

## Prompt

In [11]:
template = """
Role: you are an expert in all aspects of operations, engineering, design, maintenance, airworthiness and certification for rotorcraft and airplanes.

Context: use the previous document provided in the chat.

Question: """

In [12]:
response = convo.send_message(template+"Is there any restriction when installing a lithium battery?")

In [None]:
response

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=glm.GenerateContentResponse({'candidates': [{'content': {'parts': [{'text': '## Lithium Battery Installation Restrictions in Aircraft:\n\nWhile the provided document, "Approval of Propulsion Fuels, Additives, and Lubricating Oils," doesn\'t directly address lithium battery installation, my knowledge as a rotorcraft and airplane expert allows me to provide information regarding restrictions:\n\n**General Restrictions and Considerations:**\n\n* **Type of Lithium Battery:** \n    * **Lithium Metal Batteries:** Generally prohibited as cargo on passenger aircraft due to fire hazards.\n    * **Lithium-ion Batteries:** Allowed with restrictions and regulations depending on size, Watt-hour rating, and use (e.g., installed equipment, carry-on, or checked baggage).\n* **Regulations:**\n    * **FAA:** Strict guidelines for carrying lithium batteries on board, particularly for large batteries or those exceeding specifi

In [None]:
convo.last.text

AttributeError: 'NoneType' object has no attribute 'text'