# Generate Podcast Synopsis

In [1]:
%load_ext autoreload
%autoreload 2

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Load Data

In [2]:
fname = "../data/ft-interview-transcription.txt"

with open(fname, 'r') as f:
    content = f.readlines()

# convert list to str
content = ' '.join(content) 
#print(content)

## Set Up Azure OpenAI

In [3]:
import os
import openai
from dotenv import load_dotenv

# Set up Azure OpenAI
load_dotenv()
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2022-12-01"
openai.api_key = os.getenv("OPENAI_API_KEY")

True

## Deploy a Model

In [4]:
# id of desired_model
desired_model = 'text-davinci-003' # suitable for text generation
desired_capability = 'completion'

# list models deployed with
deployment_id = None
result = openai.Deployment.list()

for deployment in result.data:
    if deployment["status"] != "succeeded":
        continue
    
    model = openai.Model.retrieve(deployment["model"])

    # check if desired_model is deployed, and if it has 'completion' capability
    if model["id"] == desired_model and model['capabilities'][desired_capability]:
        deployment_id = deployment["id"]
        
# if no model deployed, deploy one
if not deployment_id:
    print('No deployment with status: succeeded found.')

    # Deploy the model
    print(f'Creating a new deployment with model: {desired_model}')
    result = openai.Deployment.create(model=desired_model, scale_settings={"scale_type":"standard"})
    deployment_id = result["id"]
    print(f'Successfully created {desired_model} that supports text {desired_capability} with id: {deployment_id}.')
else:
    print(f'Found a succeeded deployment of "{desired_model}" that supports text {desired_capability} with id: {deployment_id}.')

Found a succeeded deployment of "text-davinci-003" that supports text completion with id: text-davinci-003.


## Text chunks generator

In [5]:
# A generator that split a text into smaller chunks of size n, preferably ending at the end of a sentence
def chunk_generator(text, n, tokenizer):
    tokens = tokenizer.encode(text)
    i = 0
    while i < len(tokens):
        # Find the nearest end of sentence within a range of 0.5 * n and 1.5 * n tokens
        j = min(i + int(1.5 * n), len(tokens))
        while j > i + int(0.5 * n):
            # Decode the tokens and check for full stop or newline
            chunk = tokenizer.decode(tokens[i:j])
            if chunk.endswith(".") or chunk.endswith("\n"):
                break
            j -= 1
        # If no end of sentence found, use n tokens as the chunk size
        if j == i + int(0.5 * n):
            j = min(i + n, len(tokens))
        yield tokens[i:j]
        i = j


## Request API

In [6]:
def request_api(document, prompt_postfix, max_tokens):
    prompt = prompt_postfix.replace('<document>',document)
    #print(f'>>> prompt : {prompt}')

    response = openai.Completion.create(  
    deployment_id=deployment_id, 
    prompt=prompt,
    temperature=0,
    max_tokens=max_tokens,
    top_p=1,
    frequency_penalty=1,
    presence_penalty=1,
    stop='###')

    return response['choices'][0]['text']

## Generate Synopsis

In [7]:
def get_synopsis(content, prompt_postfix):
    import tiktoken

    synopsis_chunck = []
    n = 2000 # max tokens for chuncking
    max_tokens = 1000 # max tokens for response

    tokenizer = tiktoken.get_encoding('p50k_base')

    # Generate chunkcs    
    chunks = chunk_generator(content, n, tokenizer)

    # Decode chunk of text
    text_chunks = [tokenizer.decode(chunk) for chunk in chunks]

    # Request api
    for chunk in text_chunks:
        synopsis_chunck.append(request_api(chunk, prompt_postfix, max_tokens))
        #print(chunk)
        #print('>>> synopsis: \n' + synopsis_chunck[-1])

    # Synopsis
    synopsis = ' '.join(synopsis_chunck)

    return synopsis

In [8]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nSummarise the transcript of a podcast above into a synopsis. 
  \nSynopsis : 
"""
#print(prompt_postfix)

synopsis = get_synopsis(content, prompt_postfix)

print(synopsis)

The Financial Times' US financial commentator Robert Armstrong discusses the collapse of Silicon Valley Bank and why it is not a repeat of the 2008 financial crisis. He explains that two factors led to SVB's collapse: bad decisions at the bank and a rapid increase in interest rates. Rob also outlines how banks operate, what went wrong with SVB specifically, and whether there are larger systemic reasons for this. He further explains why we have a two-tiered system when it comes to banking regulations, as well as how Dodd-Frank Act rollback may or may not have played a role in this situation. Finally, he reassures listeners that so long as people don't panic, everything should be fine and that deposits under $250k are covered by the US government. Robert Armstrong, a financial expert, discusses the recent collapse of SVB and its implications for banking regulation. He explains that banks are now better capitalised than before 2008 due to regulations put in place after the crisis. He also

## Translate Synopsis

In [9]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nTranslate synopsis into Mandarin.  
  \nTranslation : 
"""
#print(prompt_postfix)

In [10]:
max_tokens = 1000
translation = request_api(synopsis, prompt_postfix, max_tokens)
print(translation)

美国金融评论家罗伯特·阿姆斯特朗（Robert Armstrong）在《金融时报》中讨论了硅谷银行的崩溃，以及为什么这不是2008年金融危机的重演。 他解释说，导致SVB倒闭的两个因素是银行内部的不当决定和利率急剧上升。 罗布还概述了银行如何运作、SVB具体出了什么问题以及是否存在更大的系统原因。 此外，他还说明了我们在银行监管方面存在一套分层体制，并提出道德弗兰克法案(Dodd-Frank Act) 回退可能或可能不会对此情况产生影响。 最后，他保证人民只要不惊慌就应该一切安好；而250,000美元之下的储备将由美国政府承保。 金融专家 Robert Armstrong 讨论了 SVB 最近的崩盘及其对银行监管意味.  他 解释 说 ： 由 于 2008 年 后 所 颁 布 的 相 关 法 规 ； 银 行 的 资 本 水 平 比 2008年之前已有所加强。 
  
此外, 也建议投���者在存入钱时注意看看它们的杠杆情况, 最后, 预测随之而来, 高风标准将使得 bank equity capital (bank equity capital) 成本上升 ,并放緩效能.


美国金融评论家罗伯特·阿姆斯特朗（Robert Armstrong）在《金融时报》上就硅谷银行的崩溃进行了讨论，并且说明这不是2008年金融危机的复制。他解释说，导致SVB倒闭的原因有两方面：一是银行内部出现了不当决定; 二是利率急剧上升。此外，Rob 还概述了银行如何运作、SVB具体出现什么问题以及是否存在更大的体系性原因。此外，他还详细说明了为什么我们在监管方面存在双标准制度, 以及道德弗兰克法案(Dodd-Frank Act) 的退减对此情况会不会造成影响。最后,  Robert Armstrong 向听众保证, 只要人民不惊慌失措, 情况应当一切安好, 250k 美元之下的储户由美国政府承保.
Robert Armstrong 金融专家提出了 SVB 最新崩盘及其对相关监管带来的影响。 他表明由于 2008 年危机之后所施加的法律法规使得如今的银行已然比 2008 年时更加强壮。 此外， 也告诫投者将存款存入需要留意看看能耐性情况。 最后 ， Robert Armstrong 预测随之考勒将使得 bank equity capital 更加昂���耗时曲緩效应将随之考勒使效能受到影响.

## Generate Tag Lines

In [11]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nGenerate 2 to 3 tag lines based on the podcast synopsis above.
"""
#print(prompt_postfix)

In [12]:
max_tokens = 500
tag_lines = request_api(synopsis, prompt_postfix, max_tokens)
print(tag_lines)


1. Get the facts on Silicon Valley Bank's collapse and its implications for banking regulations.
2. Don't panic: Robert Armstrong explains why deposits under $250k are safe.
3. Learn how Dodd-Frank Act rollback may have played a role in SVB's collapse with Robert Armstrong.


## Generate Search Engine Optimised (SEO) Keywords

In [13]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nGenerate 5 search engine optimised keywords based on text above.  
"""
#print(prompt_postfix)

In [14]:
def get_keywords(content, prompt_postfix):
    import tiktoken

    keywords_chunck = []
    n = 2000 # max tokens for chuncking
    max_tokens = 100

    tokenizer = tiktoken.get_encoding('p50k_base')

    # Generate chunkcs    
    chunks = chunk_generator(content, n, tokenizer)

    # Decode chunk of text
    text_chunks = [tokenizer.decode(chunk) for chunk in chunks]

    # Request api
    for chunk in text_chunks:
        keywords_chunck.append(request_api(chunk, prompt_postfix, max_tokens))

    # Keywords
    keywords = ' '.join(keywords_chunck)
    return keywords

In [15]:
keywords = get_keywords(synopsis, prompt_postfix)
print(keywords)


1. Silicon Valley Bank collapse
2. Robert Armstrong financial expert
3. Banking regulations post-2008 crisis
4. Dodd-Frank Act rollback 
5. US government deposit insurance
