# Chicago flagshback

In [10]:
import env
import utils

# We use Gemini directly instead through langchain
# because of safety settings,
# langchain is developing this and not hasn't released yet
import google.generativeai as genai
import os

genai.configure(
    api_key=os.environ.get("GOOGLE_API_KEY")
)
llm_gemini = genai.GenerativeModel(
    model_name = "gemini-pro",
    safety_settings=utils.safety_config
)


In [11]:
input_file = f"/Users/bell/Books/ChicagoFlashback.epub"
output_file = f"Chicago/ChicagoFlashback-cn.txt"

In [13]:
# need to install:
#  brew install pandoc
#  pip install pandoc, pypandoc
from langchain_community.document_loaders import UnstructuredEPubLoader

loader = UnstructuredEPubLoader(input_file, mode="elements")
pages = loader.load()

In [18]:
print(f"pages: {len(pages)}")
print(f"max size: {max([len(p.page_content) for p in pages])}")

updated_pages = []
size = 0
index = 0
content = ""
while index < len(pages):
    page = pages[index]
    size += len(page.page_content)
    content += page.page_content
    if size > 5000:
        updated_pages.append(content)
        content = ""
        size = 0
    index += 1
print(len(updated_pages))

pages: 4206
max size: 852
33


In [19]:
print(len(updated_pages[0]))
print(updated_pages[0])
pages = updated_pages

20035
Copyright © 2017 by the
Chicago TribuneAll rights reserved.
No part of this book may be
reproduced or transmitted in any form or by any means, electronic or
mechanical, including photocopying, recording, or by any information
storage and retrieval system, without express written permission from
the publisher.Chicago
Tribune: R.
Bruce Dold, Publisher &
Editor-in-Chief; Peter Kendall, Managing Editor; Colin McMahon,
Associate Editor; Amy Carr, Associate Managing Editor/Features; Marianne
Mather, Photo Editor; Kathleen O’Malley, Copy Editor.Chicago
Flashbackebook ISBN 13:
978-1-57284-807-8First printing: November
2017101718192022Midway Books is an imprint of
Agate Publishing. Agate books
are available in bulk at discount prices. For more information, visit
agatepublishing.com.ContentsINTRODUCTIONHappy
birthday, ChicagoCHAPTER
ONE:Pageantry and
ProgressFerris’ wheel
dealThe Second
City’s second world’s fairThe dingy
cityRaising
Chicago out of the mudIn a rush,
the river is reversedTh

In [20]:
file_output = open(output_file, "a")

for index, page in enumerate(pages[:]):
    prompt = f""" Suppose you are a translator, and your target language is simplified Chinese, 
    Your audiences are Chinese native young students in Junior school about 13 years old. 
    Translate the following english text to simplified Chinese: \ ```{page}``` """ 
    
    response = llm_gemini.generate_content(
        prompt,
        safety_settings=utils.safety_config
    )

    file_output.write(f"\n---- part: {index} ---- \n")
    if len(response.candidates) > 0:
        # we only get the first candidate
        for p in response.candidates[0].content.parts:
            file_output.write(p.text.strip())
        file_output.flush()
    
file_output.close()