In [14]:
import os
import time
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

In [15]:
genai.configure(api_key=os.environ['GEMINI_API_KEY'])

In [16]:
# Create the model
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-pro",
    generation_config=generation_config,
    safety_settings={
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    },
    system_instruction="You are a professional summarizer specialized in Markdown and JSON formats. Summarize the given Markdown text and JSON object and output in the Markdown format. Do not use lists or very short sentences. Write 5-10 paragraphs.",
)

In [17]:
def summarize(md_path: str, json_path: str, summarization_path: str) -> None:
    chat_session = model.start_chat(history=[])

    content = ''

    with open(md_path, "r", encoding="UTF-8") as f:
        content += f.read()

    with open(json_path, "r", encoding="UTF-8") as f:
        content += f"\n\n```json\n{f.read()}```"

    response = chat_session.send_message(content)

    with open(summarization_path, 'w', encoding='UTF-8') as f:
        f.write(response.text)

In [21]:
paths = [(f'./MDs/{md}', f'./JSONs/{md.split(".")[-2]}.json', f'./Summarizations/{md.split(".")[-2]}.md') for md in sorted(os.listdir('./MDs/'))]

In [None]:
for md_path, json_path, summ_path in paths:
    print(md_path, json_path, summ_path)

    summarize(md_path, json_path, summ_path)

    time.sleep(60)