In [32]:
import json
import sys
if '..' not in sys.path:
    sys.path.append('..')

import cohere
import pandas as pd

from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation

# APIs

In [4]:
with open('../cohere_api_key.txt', 'r') as f:
    cohere_api_key = f.read()
co = cohere.Client(cohere_api_key)

with open('../stability_api_key.txt', 'r') as f:
    stability_api_key = f.read()
stability_api = client.StabilityInference(
    key=stability_api_key, 
    verbose=True,
)

del cohere_api_key
del stability_api_key

# Summarizing stories

In [27]:
def summarize(prompt, model="xlarge", 
             num_generations=5, temperature=0.7, 
             max_tokens=2000, stop_sequences=['<end>']):
             
  prediction = co.generate(
    model=model,
    prompt=prompt,
    return_likelihoods = 'GENERATION',
    stop_sequences=stop_sequences,
    max_tokens=max_tokens,
    temperature=temperature,
    num_generations=num_generations)
  
  # Get list of generations
  gens = []
  likelihoods = []
  for gen in prediction.generations:
      gens.append(gen.text)
      
      sum_likelihood = 0
      for t in gen.token_likelihoods:
          sum_likelihood += t.likelihood
      # Get sum of likelihoods
      likelihoods.append(sum_likelihood)

  pd.options.display.max_colwidth = 200
  # Create a dataframe for the generated sentences and their likelihood scores
  df = pd.DataFrame({'generation': gens, 'likelihood': likelihoods})
  # Drop duplicates
  df = df.drop_duplicates(subset=['generation'])
  # Sort by highest sum likelihood
  df = df.sort_values('likelihood', ascending=False, ignore_index=True)
  
  return df

### Tutorial

In [28]:
prompt = f"""Passage: Is Wordle getting tougher to solve? \
    Players seem to be convinced that the game has gotten harder in recent weeks \
    ever since The New York Times bought it from developer Josh Wardle in late January. \
    The Times has come forward and shared that this likely isn't the case. \
    That said, the NYT did mess with the back end code a bit, removing some offensive \
    and sexual language, as well as some obscure words There is a viral thread \
    claiming that a confirmation bias was at play. One Twitter user went so far as \
    to claim the game has gone to "the dusty section of the dictionary" \
    to find its latest words.

TLDR: Wordle has not gotten more difficult to solve.
--
Passage: ArtificialIvan, a seven-year-old, London-based payment and expense management \
software company, has raised $190 million in Series C funding led by ARG Global, \
with participation from D9 Capital Group and Boulder Capital. Earlier backers also \
joined the round, including Hilton Group, Roxanne Capital, Paved Roads Ventures, \
Brook Partners, and Plato Capital.

TLDR: ArtificialIvan has raised $190 million in Series C funding.
--
Passage: The National Weather Service announced Tuesday that a freeze warning is in \
effect for the Bay Area, with freezing temperatures expected in these areas overnight. \
    Temperatures could fall into the mid-20s to low 30s in some areas. In anticipation \
    of the hard freeze, the weather service warns people to take action now.

TLDR:"""

In [29]:
response = co.generate( 
    model='xlarge', 
    prompt = prompt,
    max_tokens=100, 
    temperature=1.5,
    stop_sequences=["--"])

In [30]:
summary = response.generations[0].text

In [31]:
print(summary)

--


### Actual fairytales

In [33]:
with open('../data/stories/fairy_tales.json', errors='ignore') as f:
    data = json.load(f)
    # text = data['text']
    # print(type(text), len(text))

In [35]:
for story in data:
    print(story['title'])
    print()
    print(story['summary'])
    break

Hansel and Gretel

Cast out by their parents, the starving brother and sister stumble on the edible house of an old woman. They don't know she's built the house specifically to lure children to their death. The old woman cages Hansel and forces Gretel to work as a servant, but in the end Gretel manages to kill the woman and free her brother. They return home with riches from the house in the woods to find their stepmother dead and their father overjoyed to see them.


In [37]:
#TODO: wygląda na to, że trzeba zrobić jakiś cleanup historii,
# aby usunąć ten znak: â€ś

print(data[0]['text'])
print()
print(data[0]['summary'])
print()
print(data[1]['text'])

Once upon a time, a brother and sister named Hansel and Gretel lived in a hut in the woods with their father who was a poor woodcutter and their mother. Their parents were very poor and had barely enough food to eat One day, their parents sent them off into the woods in search of greener pastures. Their mother cried as she sent them off but they could not take care of them any longer. Hansel and Gretel took a few pebbles and some bread crumbs with them. That night the two children had nowhere to sleep. They wandered in the forest for days, looking for food and a place to stay. The children slept under a tree that night and kept each other warm. The next morning when the sun rose, Hansel turned to his little sister. â€śGretel,â€ť he said, â€śwe cannot stay here.  We must go deeper into the woods!  Surely we will find more to eat than what we get at home and we can bring some food for mother and fatherâ€ť Gretel was worried. â€śBut what if we get lost?â€ť she asked â€śWe wonâ€™t!â€ť said

In [40]:
# too_long_prompt = f"""{data[0]['text']}

# TLDR: Two starving children found eatable house inhabited by witch. \
#     She cages them and plans to eat them, but Hansel and Gretel manage to kill her and escape.
# --

# {data[1]['text']}

# TLDR: 
# """

In [48]:
prompt = f"""Passage: ArtificialIvan, a seven-year-old, London-based payment and expense management \
software company, has raised $190 million in Series C funding led by ARG Global, \
with participation from D9 Capital Group and Boulder Capital. Earlier backers also \
joined the round, including Hilton Group, Roxanne Capital, Paved Roads Ventures, \
Brook Partners, and Plato Capital.

TLDR: ArtificialIvan has raised $190 million in Series C funding.
--

Passage: {data[7]['text']}

TLDR: 
"""

In [49]:
results = summarize(prompt)

CohereError: too many tokens: total number of tokens (prompt and prediction) cannot exceed 2048 - received 3082. Try using a shorter prompt or a smaller max_tokens value.

In [None]:
results.head()