# Chicago flagshback

In [26]:
import env
import utils

# We use Gemini directly instead through langchain
# because of safety settings,
# langchain is developing this and not hasn't released yet
import google.generativeai as genai
import os

genai.configure(
    api_key=os.environ.get("GOOGLE_API_KEY")
)
llm_gemini = genai.GenerativeModel(
    model_name = "gemini-pro",
    safety_settings=utils.safety_config
)


In [27]:
input_file = f"/Users/bell/Books/ChicagoFlashback.epub"
output_file = f"Chicago/ChicagoFlashback-cn.txt"

In [28]:
# need to install:
#  brew install pandoc
#  pip install pandoc, pypandoc
from langchain_community.document_loaders import UnstructuredEPubLoader

loader = UnstructuredEPubLoader(input_file, mode="elements")
pages = loader.load()

In [29]:
print(f"pages: {len(pages)}")
print(f"max size: {max([len(p.page_content) for p in pages])}")

updated_pages = []
size = 0
index = 0
content = ""
while index < len(pages):
    page = pages[index]
    size += len(page.page_content)
    content += page.page_content
    if size > 4000:
        updated_pages.append(content)
        content = ""
        size = 0
    index += 1
print(len(updated_pages))

pages: 4206
max size: 852
162


In [35]:
print(len(updated_pages[0]))
print(updated_pages[2])
pages = updated_pages

4302
1845: Chicago passes first blue
law, closing “tippling houses” on Sundays.1846: Chicago claims it has one of
the “best and safest harbors on the lake.”1847: Chicago Tribune begins
publishing.1848: Chicago welcomes business
with opening of the I&M Canal and the Chicago Board of
Trade.1849: Spring storm sweeps away all
Chicago River bridges.1850: City planks 6.7 miles of
streets, including 12,000 feet of State Street.1851: Public Water Board organized
to handle recurring cholera epidemics.1852: First public transportation
(a large horse-drawn carriage).1853: YMCA expands to
Chicago.1854: Lakeview is promoted as a
pleasant summer retreat away from city’s disease and heat.1855: Lager Beer Riots in April
protest higher saloon taxes and anti-beer laws.1856: City raises streets out of
the swamp.1857: Allan Pinkerton’s men thwart
a grave-robbing scheme by a city official.1858: Police force gets uniforms
and fire department switches from volunteer to paid.1859: First horse-drawn street
rai

In [40]:
file_output = open(output_file, "a")
start = 141
for index, page in enumerate(pages[start:]):
    prompt = f""" Suppose you are a translator, and your target language is simplified Chinese, 
    Your audiences are Chinese native young students in Junior school about 13 years old. 
    Translate the following english text to simplified Chinese: \ ```{page}``` """ 
    
    response = llm_gemini.generate_content(
        prompt,
        safety_settings=utils.safety_config
    )

    file_output.write(f"\n---- part: {index+start} ---- \n")
    if len(response.candidates) > 0:
        # we only get the first candidate
        for p in response.candidates[0].content.parts:
            file_output.write(p.text.strip())
        file_output.flush()
    
file_output.close()