In [1]:
!sudo apt-get install festival
!pip install phonemizer
!pip install --upgrade openai
!pip install keybert

Reading package lists... Done
Building dependency tree       

The following additional packages will be installed:
  alsa-utils festlex-cmu festlex-poslex festvox-kallpc16k libatopology2
  libestools2.5 libfftw3-single3 sgml-base
Suggested packages:
  dialog pidgin-festival festival-freebsoft-utils libfftw3-bin libfftw3-dev
  sgml-base-doc
The following NEW packages will be installed:
  alsa-utils festival festlex-cmu festlex-poslex festvox-kallpc16k
  libatopology2 libestools2.5 libfftw3-single3 sgml-base
0 upgraded, 9 newly installed, 0 to remove and 24 not upgraded.
Need to get 8,238 kB of archives.
After this operation, 26.9 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu focal/main amd64 sgml-base all 1.29.1 [12.4 kB]
Get:2 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libatopology2 amd64 1.2.2-2.1ubuntu2.5 [47.3 kB]
Get:3 http://archive.ubuntu.com/ubuntu focal/main amd64 libfftw3-single3 amd64 3.3.8-2ubuntu1 [756 kB]
Get:4 http://arch

## Import Libraries

In [2]:
from phonemizer import phonemize
from phonemizer.separator import Separator
import os
import openai
from keybert import KeyBERT
import nltk
from nltk import pos_tag, word_tokenize
from nltk.stem import WordNetLemmatizer
import string

In [3]:
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

## Specify OpenAI Finetuned Model for Counterspeech Generation

In [5]:
def gen_counterspeech(model, prompt, max_tokens=80, stop=['->', '[', '=>'], temp=0.3, pres_pen=-0.8, freq_pen=1, best_of=1, n=1):
  prompt = prompt + ' ->'
  res = openai.Completion.create(model=model, prompt=prompt, max_tokens=80, stop=['->', '[', '=>'], temperature=temp, presence_penalty=pres_pen, frequency_penalty=freq_pen, best_of=best_of, n=n)
  return res

## Keyword Extraction

In [6]:
kw_model = KeyBERT(model='all-mpnet-base-v2')
lemmatizer = WordNetLemmatizer()

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [7]:
def extract_topics(counterspeech_result, highlight=True, use_mmr=True, diversity=0.2, keywords_n=3):
  topics = []

  for cs in counterspeech_result['choices']:
    keywords = kw_model.extract_keywords(cs['text'], keyphrase_ngram_range=(1, 1), stop_words='english', highlight=highlight, use_mmr=True, diversity=diversity)

    tmp = []
    for x in keywords[:keywords_n]:
      tmp.append(x[0])
    topics.append(tmp)
  
  return topics


## Haiku Generation

In [8]:
def gen_haikus(topics, max_tokens=20, tmp=0.7, pres_pen=-1, freq_pen=2, best_of=5, n=3):
  haiku_res = openai.Completion.create(
    model="text-davinci-003",
    prompt=f"Write a haiku regarding these topics: {topics}",
    max_tokens=max_tokens,
    temperature=tmp,
    presence_penalty=pres_pen, 
    frequency_penalty=freq_pen, 
    best_of=best_of, 
    n=n
  )

  return haiku_res

## Phoenemizer Pruning

In [9]:
def count_haiku_syllables(haiku):
  phn = phonemize(haiku, language='en-us', backend='festival',
                with_stress=False, separator=Separator(phone=None,
                word=' ', syllable="|"), strip=True)
  
  split_lines_words = [words.split(' ') for words in phn.split('\n')]

  syllables_count = []

  for line in split_lines_words:
    syllables = 0
    for word in line:
      syllables += len(word.split('|'))

    syllables_count.append(syllables)
  
  
  return syllables_count

In [10]:
def get_candidates(haikus, cs_topics, hs_topics):
  candidates = {'5s': [], '7m': [], '5f': []}
  haiku_candidates = []

  for haiku_choices in haikus:
    for haiku in haiku_choices['choices']:
      
      split_lines = haiku['text'].split('\n')[2:] # first two entires are \n, \n
      syllable_count = count_haiku_syllables(haiku['text'])

      print(split_lines, syllable_count)

      if syllable_count == [5,7,5]:
        haiku_candidates.append(haiku['text'])

      if len(syllable_count) >= 1 and syllable_count[0] == 5 and split_lines[0] not in candidates['5s']:
        candidates['5s'] = candidates['5s'] + [split_lines[0]]
      if len(syllable_count) >= 2 and syllable_count[1] == 7 and split_lines[1] not in candidates['7m']:
        candidates['7m'] = candidates['7m'] + [split_lines[1]]
      if len(syllable_count) >= 3 and syllable_count[2] == 5 and split_lines[2] not in candidates['5f']:  
        candidates['5f'] = candidates['5f'] + [split_lines[2]]

  synthetic_haikus = []


  for start in candidates['5s']:
    for middle in candidates['7m']:
      for finish in candidates['5f']:
        synthetic_haikus.append((start, middle, finish))


  return synthetic_haikus, haiku_candidates, candidates


In [18]:
def gen_synthetic_cubed(synthetic, cs_topics, hs_topics):
  synthetic_cubed = []
  candidates = {'5s': [], '7m': [], '5f': []}

  for s in synthetic:
    tmp = []

    for w in s[0].lower().split(' '):
      w = w.translate(str.maketrans('', '', string.punctuation)) 
      if w in hs_topics and w in cs_topics:
        tmp.append(s[0])
        break

    for w in s[1].lower().split(' '):
      w = w.translate(str.maketrans('', '', string.punctuation)) 
      # if w in hs_topics or w in cs_topics:
      if w in cs_topics:
        tmp.append(s[1])
        break

    for w in s[1].lower().split(' '):
      w = w.translate(str.maketrans('', '', string.punctuation)) 
      # if w in hs_topics or w in cs_topics:
      if w in cs_topics:
        tmp.append(s[2])
        break

    if len(tmp) == 3:
      synthetic_cubed.append(tmp)

      candidates['5s'] = candidates['5s'] + [tmp[0]]
      candidates['7m'] = candidates['7m'] + [tmp[1]]
      candidates['5f'] = candidates['5f'] + [tmp[2]]

      

  final_synthetic_cubed = []
  visited = set()

  for s in synthetic_cubed:
    if s[0] not in visited and s[1] not in visited and s[2] not in visited:
      final_synthetic_cubed.append(s)
      visited.update(s)
    
  return synthetic_cubed, final_synthetic_cubed

## Driver

In [23]:
openai.api_key ='INSERT OPENAI API KEY HERE'
prompt = "Aliens should not be on Earth and should go back to outer space. They do not belong here." #@param {type:"string"}
ft_model = "davinci:ft-personal:hs-cs-v2-2023-04-25-16-06-31"

cs_temp = 0.35 #@param {type:"slider", min:0, max:1, step:0.05}
cs_pres_pen = -0.2 #@param {type:"slider", min:-2, max:2, step:0.1}
cs_freq_pen = 0.4 #@param {type:"slider", min:-2, max:2, step:0.1}
cs_best_of = 7 #@param {type:"slider", min:1, max:7, step:1}
cs_n = 4 #@param {type:"slider", min:1, max:5, step:1}

# get counterspeech from fine-tuned davinci model
counterspeech_result = gen_counterspeech(ft_model, prompt, temp=cs_temp, pres_pen=cs_pres_pen, freq_pen=cs_freq_pen, best_of=cs_best_of, n=cs_n)

# generated counterspeech:
print('GENERATED COUNTERSPEECH')
print()
for cs in counterspeech_result['choices']:
  print(cs['text'])
  print('-----')
print()
print('--------------------------------------------------------------------------------------')
print()

number_of_cs_topics_to_extract = 3 #@param {type:"slider", min:1, max:5, step:1}
number_of_hs_topics_to_extract = 3 #@param {type:"slider", min:1, max:5, step:1}
topic_diversity = 0.5 #@param {type:"slider", min:0, max:1, step:0.1}
# get topics from keybert
topics = extract_topics(counterspeech_result, highlight=False, use_mmr=True, diversity=0.5, keywords_n=number_of_cs_topics_to_extract)


haiku_tmp = 0.35 #@param {type:"slider", min:0, max:1, step:0.05}
haiku_pres_pen = -0.4 #@param {type:"slider", min:-2, max:2, step:0.1}
haiku_freq_pen = 0.3 #@param {type:"slider", min:-2, max:2, step:0.1}
haiku_best_of = 5 #@param {type:"slider", min:3, max:7, step:1}
davinci_num_haikus_generated = 5 #@param {type:"slider", min:3, max:7, step:1}
# generate haikus with davinci
haikus = []
for topic in topics:
  haikus.append(gen_haikus(topic, max_tokens=20, tmp=haiku_tmp, pres_pen=haiku_pres_pen, freq_pen=haiku_freq_pen, n=davinci_num_haikus_generated))

# get hatespeech and counterspeech topics

cs_topics = sum(topics, [])
hs_topics = [x[0] for x in kw_model.extract_keywords(prompt, keyphrase_ngram_range=(1, 1), stop_words='english', highlight=False, use_mmr=True, diversity=0.5)[:number_of_hs_topics_to_extract]]
print('TOPICS IDENTIFIED\n')
print('HATESPEECH TOPICS: ', hs_topics)
print('COUNTERSPEECH TOPICS: ', cs_topics)
print()
print('--------------------------------------------------------------------------------------')
print()

# generate synthetic and ground haiku candidates
synthetic, haiku_candidates, _ = get_candidates(haikus, cs_topics, hs_topics)

GENERATED COUNTERSPEECH

 On the contrary, they are welcome here, as long as they respect our laws and culture.


-----
 I don't see why aliens shouldn't be on Earth. They have the right to be here, just as anyone else.

 
-----
 I think it is important to remember that we are all created equal in the eyes of God, and that we are all brothers and sisters.

Aliens should be treated with respect.

What is the point of being on Earth if you do not respect other people?

Do you really want to live in a world like that?

You can't just wish for a better
-----
 You are entitled to your opinion, but I think that we should respect the right of others to live and work in the country of their choice.


-----

--------------------------------------------------------------------------------------

TOPICS IDENTIFIED

HATESPEECH TOPICS:  ['aliens', 'earth', 'space']
COUNTERSPEECH TOPICS:  ['contrary', 'laws', 'culture', 'aliens', 'earth', 'shouldn', 'aliens', 'respect', 'equal', 'opinion', 'entitled

In [24]:
# output
print('5, 7, 5 HAIKUS BY DAVINCI:')
if len(haiku_candidates) == 0:
  print('No 5, 7, 5 haikus were generated by davinci')
else:
  for h in haiku_candidates:
    print(h)

print()
print('--------------------------------------------------------------------------------------')
print()
print('5, 7, 5 SYNTHETIC HAIKUS:')
print()

# generate synthetic haikus
synthetic_cubed, final_synthetic_cubed = gen_synthetic_cubed(synthetic, cs_topics, hs_topics)

for s in final_synthetic_cubed:
  print(s[0])
  print(s[1])
  print(s[2])
  print()


5, 7, 5 HAIKUS BY DAVINCI:
No 5, 7, 5 haikus were generated by davinci

--------------------------------------------------------------------------------------

5, 7, 5 SYNTHETIC HAIKUS:

Aliens here too,
Culture clashes, hearts in pain;
A battle of wills.

