#TODO: still need to try to filter out racial references ex. Asian, Black, Caucasian

from collections import defaultdict

import json
import random
import spacy
from nltk.tokenize import word_tokenize as tokenize
import string

human_plural = [' women ', ' men ', ' girls ', ' boys ', ' teenagers ', ' ladies ', ' gentlemen ', ' kids ', ' children ', ' females ', ' males ']
human_singular = [' woman ', ' man ', ' girl ', ' boy ', ' teenager ', ' lady ', ' gentleman ', ' kid ', ' child ', ' female ', ' male ']
tarot_names_singular = [' Fool ', ' Magician ', ' High Priestess ', ' Empress ', ' Emperor ', ' Hierophant ', ' Lover ', ' Chariot ', ' Strength ', ' Hermit ', ' Wheel of Fortune ', ' Justice ', ' Hanged Man ', ' Death ', ' Temperance ', ' Devil ', ' Tower ', ' Star ', ' Moon ', ' Sun ', ' Judgement ', ' World ']
tarot_names_plural = [' Fools ' , ' Magicians ', ' High Priestesses ', ' Empresses ', ' Emperors ', ' Hierophants ', ' Lovers ', ' Chariots ', ' Strengths ', ' Hermits ', ' Wheel of Fortunes ', ' Justices ', ' Hanged Men ', ' Deaths ', ' Temperances ', ' Devils ', ' Towers ', ' Stars ', ' Moons ', ' Suns ', ' Judgements ', ' Worlds ']
astrology_names_singular = [' Aries ', ' Taurus ', ' Gemini ', ' Cancer ', ' Leo ', ' Virgo ', ' Libra ', ' Scorpio ', ' Sagittarius ', ' Capricorn ', ' Aquarius ', ' Pisces ']
colors = [' red ', ' orange ', ' yellow ', ' green ', ' blue ', ' indigo ', ' violet ', ' purple ', ' lilac ', ' pink ', ' beige ', ' brown ', ' black ', ' white ', ' gray ', ' gold ', ' silver ', ' bronze ']

for sign in astrology_names_singular:
    tarot_names_singular.append(sign)

snli_path = #add path to SNLI corpus here
predictions_week1 = []

with open(snli_path) as f:
    for line in f.readlines():
        data = json.loads(line)
        if data['gold_label'] == 'neutral':
            sentence_1 = (data['sentence1'])
            sentence_1_pos = (data['sentence1_parse'])
            sentence_1_tokens = tokenize(sentence_1)
            prediction = {}
            prediction['sentence_1_string'] = sentence_1
            prediction['sentence_1_pos'] = sentence_1_pos
            prediction['sentence_1_tokens'] = sentence_1_tokens
            predictions_week1.append(prediction)

tarot = ''
sentences = set()
tarot_sentences = set()

for prediction in predictions_week1:
    for key, value in prediction.items():
        if key == 'sentence_1_string': #or key == 'sentence_2_string':
            sentences.add(value)

with open("all_output.txt", "w") as all_file:
    with open("color_output.txt", "w") as color_file:
        while len(sentences) > 0:
            sentence = sentences.pop()
            for human in human_singular:
                if human in sentence:
                    tarot = random.choice(tarot_names_singular)
                    tarot_sentence_singular = sentence.replace(human, tarot)
                    tarot_sentences.add(tarot_sentence_singular)
                    all_file.write(tarot_sentence_singular + '\n')
                    break

            for humans in human_plural:
                if humans in sentence:
                    tarot = random.choice(tarot_names_plural)
                    tarot_sentence_plural = sentence.replace(humans, tarot)
                    tarot_sentences.add(tarot_sentence_plural)
                    all_file.write(tarot_sentence_plural + '\n')
                    break

color_chapters = defaultdict(list)

for color in colors:
    color_name = color.strip(' ')
    for sentence in tarot_sentences:
        if color in sentence:
            color_chapters[color_name].append(sentence)

with open("color_visions_tkacz.txt", "w") as color_visions_file:
    print('COLOR VISIONS', file=color_visions_file)
    print('Lesia Tkacz, 2019', file=color_visions_file)
    print('Created using the Stanford Natural Language Inference Corpus', file=color_visions_file)
    print(' ', file=color_visions_file)
    print('-------- CHAPTERS --------', file=color_visions_file)
    chapter_counter = 0
    for color, sentence in color_chapters.items():
        chapter_counter += 1
        print(chapter_counter, color.upper(), file=color_visions_file)
    print(' ', file=color_visions_file)

    for chapter_title, sentences in color_chapters.items():
        print('--------', 'VISIONS IN ', chapter_title.upper(), '--------', file=color_visions_file)
        for sentence in sentences:
            print(sentence, file=color_visions_file)
        print(' ', file=color_visions_file)

print('DONE!!!')