#TODO: still need to try to filter out racial references ex. Asian, Black, Caucasian from collections import defaultdict import json import random import spacy from nltk.tokenize import word_tokenize as tokenize import string human_plural = [' women ', ' men ', ' girls ', ' boys ', ' teenagers ', ' ladies ', ' gentlemen ', ' kids ', ' children ', ' females ', ' males '] human_singular = [' woman ', ' man ', ' girl ', ' boy ', ' teenager ', ' lady ', ' gentleman ', ' kid ', ' child ', ' female ', ' male '] tarot_names_singular = [' Fool ', ' Magician ', ' High Priestess ', ' Empress ', ' Emperor ', ' Hierophant ', ' Lover ', ' Chariot ', ' Strength ', ' Hermit ', ' Wheel of Fortune ', ' Justice ', ' Hanged Man ', ' Death ', ' Temperance ', ' Devil ', ' Tower ', ' Star ', ' Moon ', ' Sun ', ' Judgement ', ' World '] tarot_names_plural = [' Fools ' , ' Magicians ', ' High Priestesses ', ' Empresses ', ' Emperors ', ' Hierophants ', ' Lovers ', ' Chariots ', ' Strengths ', ' Hermits ', ' Wheel of Fortunes ', ' Justices ', ' Hanged Men ', ' Deaths ', ' Temperances ', ' Devils ', ' Towers ', ' Stars ', ' Moons ', ' Suns ', ' Judgements ', ' Worlds '] astrology_names_singular = [' Aries ', ' Taurus ', ' Gemini ', ' Cancer ', ' Leo ', ' Virgo ', ' Libra ', ' Scorpio ', ' Sagittarius ', ' Capricorn ', ' Aquarius ', ' Pisces '] colors = [' red ', ' orange ', ' yellow ', ' green ', ' blue ', ' indigo ', ' violet ', ' purple ', ' lilac ', ' pink ', ' beige ', ' brown ', ' black ', ' white ', ' gray ', ' gold ', ' silver ', ' bronze '] for sign in astrology_names_singular: tarot_names_singular.append(sign) snli_path = #add path to SNLI corpus here predictions_week1 = [] with open(snli_path) as f: for line in f.readlines(): data = json.loads(line) if data['gold_label'] == 'neutral': sentence_1 = (data['sentence1']) sentence_1_pos = (data['sentence1_parse']) sentence_1_tokens = tokenize(sentence_1) prediction = {} prediction['sentence_1_string'] = sentence_1 prediction['sentence_1_pos'] = sentence_1_pos prediction['sentence_1_tokens'] = sentence_1_tokens predictions_week1.append(prediction) tarot = '' sentences = set() tarot_sentences = set() for prediction in predictions_week1: for key, value in prediction.items(): if key == 'sentence_1_string': #or key == 'sentence_2_string': sentences.add(value) with open("all_output.txt", "w") as all_file: with open("color_output.txt", "w") as color_file: while len(sentences) > 0: sentence = sentences.pop() for human in human_singular: if human in sentence: tarot = random.choice(tarot_names_singular) tarot_sentence_singular = sentence.replace(human, tarot) tarot_sentences.add(tarot_sentence_singular) all_file.write(tarot_sentence_singular + '\n') break for humans in human_plural: if humans in sentence: tarot = random.choice(tarot_names_plural) tarot_sentence_plural = sentence.replace(humans, tarot) tarot_sentences.add(tarot_sentence_plural) all_file.write(tarot_sentence_plural + '\n') break color_chapters = defaultdict(list) for color in colors: color_name = color.strip(' ') for sentence in tarot_sentences: if color in sentence: color_chapters[color_name].append(sentence) with open("color_visions_tkacz.txt", "w") as color_visions_file: print('COLOR VISIONS', file=color_visions_file) print('Lesia Tkacz, 2019', file=color_visions_file) print('Created using the Stanford Natural Language Inference Corpus', file=color_visions_file) print(' ', file=color_visions_file) print('-------- CHAPTERS --------', file=color_visions_file) chapter_counter = 0 for color, sentence in color_chapters.items(): chapter_counter += 1 print(chapter_counter, color.upper(), file=color_visions_file) print(' ', file=color_visions_file) for chapter_title, sentences in color_chapters.items(): print('--------', 'VISIONS IN ', chapter_title.upper(), '--------', file=color_visions_file) for sentence in sentences: print(sentence, file=color_visions_file) print(' ', file=color_visions_file) print('DONE!!!')