# NLP PROJECT - ONTOLOGY OF THE THREE KINGDOMS

In [1]:
#Importing libraries for NLP techniques
from nltk.tokenize import sent_tokenize, word_tokenize 
from nltk.corpus import stopwords
from string import punctuation
import collections

#Utils
from pathlib import Path
import numpy as np
import os

# Defining functions

In [2]:
#Easy-to-use character's name lists generator from a specific noisy file.
#BUG - Some names may have " " at the end: must be removed or loading them could fail!
def generateNamesList():
    file = open("Lists/Main Characters.txt", "r");
    charText = file.readlines();
    file.close();
    allList = [];
    ladyList = [];
    menList = [];
    aliasList = [];
    aliasDict = {};
    
    print("Generating lists...");
    
    for line in charText[1:]:
        words = line.split(",");
        if " " in words[len(words)-1]:
                    words[len(words)-1] = "";

        if words[0] != "\n":
            if "lady" in words[0] or "empress" in words[0] or "diaochan" in words[0]:
                ladyList.append(words[0]);
                allList.append(words[0]);
            else:
                if "(" in words[0]:
                    names = words[0].split("(");
                    courtesy = names[1].split(")")[0];
                    aliasDict[names[0]] = courtesy;
                    aliasList.append(names[0]);
                    aliasList.append(courtesy);
                    allList.append(courtesy);
                    menList.append(names[0]);
                    allList.append(names[0]);
                else:
                    menList.append(words[0]);
                    allList.append(words[0]);
    
    np.savetxt("Lists/allNames.txt", allList, fmt="%s");
    print("allNames.txt created!");
    
    np.savetxt("Lists/menList.txt", menList, fmt="%s");
    print("menList.txt created!");
    
    np.savetxt("Lists/ladyList.txt", ladyList, fmt="%s");
    print("ladyList.txt created!");
    
    np.savetxt("Lists/aliasesList.txt", aliasList, fmt="%s");
    print("aliasesList.txt created!");
    
    return allList, menList, ladyList, aliasList, aliasDict;

In [3]:
# Load list from files, you will not have aliasDict, but you probably don't need it.
def loadLists():
    file = open("Lists/allNames.txt", "r");
    allList = file.read().split("\n");
    file = open("Lists/menList.txt", "r");
    menList = file.read().split("\n");
    file = open("Lists/ladyList.txt", "r");
    ladyList = file.read().split("\n");
    file = open("Lists/aliasesList.txt", "r");
    aliasList = file.read().split("\n");
    file.close();
    return allList, menList, ladyList, aliasList;

In [4]:
# (should) Retrieve all phrases related to the character:
# the actual sentece, the one before and the one after
# his/her name appears get merge together.
def infoXchar(charSurn, charName, sourceText):
    rightSurn = False;
    bookmark = 0;
    match = 0;
    end = len(sourceText);
    text = sourceText.copy();
    wantedInfo = [];
    
    for line in text:
        words = line.split();
        for word in words:
            if word == charName and rightSurn:
                rightSurn = False;
                match += 1;
                if bookmark == 0:
                    wantedInfo.append(text[0] + text[1] + text[2]);
                elif bookmark == end-1:
                    wantedInfo.append(text[bookmark-2] + text[bookmark-1] + text[bookmark])
                else:
                    wantedInfo.append(text[bookmark-1] + text[bookmark] + text[bookmark+1])
            elif word == charSurn:
                    rightSurn = True;
            else:
                rightSurn = False;
        bookmark += 1;
    print("Found " + str(match) + " phrases.");
    return wantedInfo;
#Version 2 will also check for aliases, if needed.

In [5]:
# (should) Retrieve all phrases where a certain
# item appears. The output is an array of string
# where each cell is formed by the actual sentece
# which you find the item on, the one before and
# the one after.
def infoXitem(item, text):
    bookmark = 0;
    match = 0;
    end = len(sourceText);
    text = sourceText.copy();
    wantedInfo = [];
    
    for line in text:
        words = line.split();
        for word in words:
            if word == item:
                match += 1;
                if bookmark == 0:
                    wantedInfo.append(text[0] + text[1] + text[2]);
                elif bookmark == end-1:
                    wantedInfo.append(text[bookmark-2] + text[bookmark-1] + text[bookmark])
                else:
                    wantedInfo.append(text[bookmark-1] + text[bookmark] + text[bookmark+1])
        bookmark += 1;
        
    print("Found " + str(match) + " phrases.");
    return wantedInfo;

In [6]:
# Return a dictionary (#appearance, word)
def frequentWord(sourceText, uniqueWords = set(), wordCount = {}):
# sourceText should be an array of string
    infoText = sourceText.copy();
    previousToken = "x";
    for block in infoText:
        tokens = word_tokenize(block);
        for word in tokens:
            if word not in stop_words:
                if previousToken[0].isupper() and word[0].isupper():
                    wordCount[previousToken + " " + word] = wordCount.get(previousToken + " " + word, 0)+1;
                    uniqueWords.add(previousToken + " " + word)
                else:
                    wordCount[word] = wordCount.get(word, 0)+1;
                    uniqueWords.add(word);
            previousToken = word;

    wordFreq = []
    for key, value in wordCount.items():
        wordFreq.append((value, key))
    wordFreq.sort(reverse=True)

    return wordFreq, uniqueWords, wordCount;

In [7]:
# Check semantic field from common words to understand
# what kind of role our character played.
def computeRole(wordFreq, wantData = False):
    warriorness = 0;
    politicness = 0;
    base = len(wordFreq);

    for term in wordFreq:
        if " " in term[1]: #Removing (most) entities
            base -= 1;
        if term[1].lower() in warWords:
            warriorness += term[0];
        if term[1].lower() in poliWords:
            politicness += term[0];

    warriorness = warriorness/(base+1);
    politicness = politicness/(base+1);
    
    if wantData:
        return warriorness, politicness;
    else:
        print("Warriorness: " + str(warriorness) + " | Politicness: " + str(politicness));
        return 0;

# Gathering text source & helping lists

In [8]:
#Allocating list
allList = [];
menList = [];
ladyList = [];
aliasList = [];

#Loading files
file = open("Dataset/chap001-004.txt", "r");
#file = open("Dataset/chap005-012.txt", "r");
chaps = file.read();
file.close();

if Path('Lists/allNames.txt').is_file():
    print("Character's name list already computed.");
    allList, menList, ladyList, aliasList = loadLists();
else:
    allList, menList, ladyList, aliasList, aliasDict = generateNamesList();

#if Path('Lists/eventNames.txt').is_file():
#    print("Event's name list ready.")
#else:
    #generateEventsList();
    #file = open("Lists/Main Events.txt", "r");
    #eventText = file.readlines();

Character's name list already computed.


In [9]:
#Adjusting Stop Words
lib_stopWords = set(stopwords.words('english'));
stop_words = lib_stopWords.copy();
stop_words.add("but");

for word in lib_stopWords:
    upWord = word[0].upper() + word[1:];
    stop_words.add(upWord);

for sign in punctuation:
    stop_words.add(sign);
    
stop_words.add("''");
stop_words.add("``");
stop_words.remove(".");

# QUESTION - What do we want to know about character X?
Let's analyze a character like, for example, Liu Bei.<br>
You may know something about him or maybe not, but trust me when I say that he is a very important character.<br>
Now we want to understand who he was in order to fill the ontology:
<ul>
    <li>What has he done? -> Which Events has he been involved in?</li>
    <li>What did he achieve? -> Which Title has he managed to get?</li>
    <li>What kind of relationships did he develop? -> With whom?</li>
</ul>
To answer those questions, first we'll have to gather infos related to him and to do so we'll extract blocks of text from the novel, avoiding what isn't about our character.

In [10]:
text = chaps.split("."); #full source reduced to set of phrases
charInfo = infoXchar("Liu", "Bei", text);

wordFreq, uniqueWords, wordCount = frequentWord(charInfo, uniqueWords = set(), wordCount = {})

Found 69 phrases.


Now that we have a list of words somehow related to our character, let's extract meaning from them and let's try to answer questions to fill our ontology.<br>

## Who was he?
We may compute a POST to quickly get adjectives that may describe our character and compute a simple sentimental analysis:
<ul>
    <li>good people tend to have more positive words in their list</li>
    <li>bad people tend to have more negative words in their list</li>
</ul>

Even if this is interesting, POST may require too much time so I skip it for now.
Still, if we focus on the semantic field of the most frequent words we could understand what kind of role our character played in the selected chapters:
<ul>
    <li>soldiers will be strongly tied to war-concepts;</li>
    <li>politicians will have more social interactions.</li>
</ul>

Let's try to understand if we can label Liu Bei as Warrior and Dong Zhuo as Politician. Distinguishing the two cases is going to be a very difficult task because in Romance of the Three Kingdoms not only everyone tends to get involved in conflicts, but also because most politician gain fame and social status by war-merits so moments where they act as warrior and other where they act as politician may mix, resulting in values hard to confronts.<br>
Still, it's worth triyng since this could be a fast way to enrich the ontology on the Involved-Object Property side.
### Role Metric: Warriors Vs Politicians

In [11]:
# Load sort of precompiled wordlists taken from internet and re-formatted by me
file = open("Lists/warWords.txt", "r");
warWords = file.read().split("\n");
file = open("Lists/poliWords.txt", "r");
poliWords = file.read().split("\n");
file.close();
# NOTE! Some terms will not make much sense in the novel's historical period 
# still this is a good starting point and it' always possible to add other words

In [12]:
#Data boost to involve more characters
file = open("Dataset/chap001-004.txt", "r");
chaps = file.read();
file = open("Dataset/chap005-012.txt", "r");
chaps += "\n" + file.read();
#Go on...
file.close();

text = chaps.split(".");

In [13]:
#Gathering & computing values for each main character
warDict = {};
poliDict = {};
warRes = [];
poliRes = [];

for char in menList:
    pers = char.split(" ");
    cSur = (pers[0][0].upper() + pers[0][1:]);
    if pers[len(pers)-1] == " ":
        pers[len(pers)-1] = "";
    if len(pers) == 1:
        cNam = "";
    else:
        cNam = (pers[1][0].upper() + pers[1][1:]);
    print(cSur + " " + cNam);
    charInfo = infoXchar(cSur, cNam, text);
    wordFreq, uniqueWords, wordCount = frequentWord(charInfo, uniqueWords, wordCount = {});
    
#Estimate 'warriorness' and 'politicness' of the character
    warriorness = 0;
    politicness = 0;
    base = len(wordFreq);

    for term in wordFreq:
        if " " in term[1]: #Removing (most) entities
            base -= 1;
        if term[1].lower() in warWords:
            warriorness += term[0];
        if term[1].lower() in poliWords:
            politicness += term[0];

    warriorness = warriorness/(base+1);
    politicness = politicness/(base+1);
    if warriorness != 0 and politicness != 0:
        data = cSur + " " + cNam + ", " + str(warriorness) + ", " + str(politicness);
        #file.write(data+"\n");       

    warDict[cSur + " " + cNam]= warriorness;
    poliDict[cSur + " " + cNam]= politicness;

Cai Mao
Found 12 phrases.
Cao Cao
Found 214 phrases.
Cao Fang
Found 0 phrases.
Cao Hong
Found 11 phrases.
Caohuan 
Found 0 phrases.
Cao Mao
Found 0 phrases.
Cao Pi
Found 0 phrases.
Cao Rui
Found 0 phrases.
Cao Shuang
Found 0 phrases.
Cao Zhen
Found 0 phrases.
Caozhi 
Found 0 phrases.
Chen Lin
Found 2 phrases.
Cheng Pu
Found 11 phrases.
Deng Ai
Found 0 phrases.
Deng Zhi
Found 0 phrases.
Dian Wei
Found 34 phrases.
Ding Feng
Found 0 phrases.
Dong Cheng
Found 0 phrases.
Dong Zhuo
Found 204 phrases.
Emperor Shao
Found 0 phrases.
Emperor Xian
Found 1 phrases.
Empress Dong
Found 9 phrases.
Fa Zheng
Found 0 phrases.
Fei Yi
Found 0 phrases.
Gan Ning
Found 0 phrases.
Gongsun Zan
Found 52 phrases.
Guan Xing
Found 0 phrases.
Guan Yu
Found 28 phrases.
Han Dang
Found 7 phrases.
Huatuo 
Found 0 phrases.
Huaxin 
Found 0 phrases.
Huang Gai
Found 8 phrases.
Huang Zhong
Found 0 phrases.
Ji Ping
Found 0 phrases.
Jia Xu
Found 6 phrases.
Jian Yong
Found 0 phrases.
Jiang Wei
Found 0 phrases.
Kan Ze
Found 0 p

In [14]:
#Sorting results
for key, value in warDict.items():
    warRes.append((value, key));
    
for key, value in poliDict.items():
    poliRes.append((value, key));
    
warRes.sort(reverse=True);
poliRes.sort(reverse=True);

Obviously many main characters don't even appear in the analyzed text since the list takes into account all character from the book while we've analyzed just 12 chapters.<br>
Anyway before looking at results there's a thing we can be sure about: values will be low, expecially for the politics metric. This will happend mainly for four reasons:
<ul>
<li>WarWords.txt is far from a perfect collection of war-related terms for the Romance of the Three Kingdoms novel, many words will be missed;</li>
<li>PoliWords.txt is way far from a perfect collection of politic-related terms for the novel, a lot of words will be missed;</li>
<li>Battles aren't the focus of the book, actually they are generally closed in a limited number of sentences with few or none descriptions. This means that even the fiercest soldier will have many words unrelated to war;</li>
<li>Narration goes fast with a limited usage of descriptions and many sentences that go directly to the point, reducing the frequences of details that could help us.</li>
</ul>
A better analysis of the actual spoken part may improved this metric, but it's likely a time-consuming work since requires me to read many of them. I'll skip that for now and keep focusing on a more general level.<br>
Let's see the results of this naive approach. <br>

## Results Analysis

In [15]:
print("WARRIOR METRIC")
for val, char in warRes:
    if val != 0:
        print(char + ": " + str(val));

print("\nPOLITIC METRIC")
for val, char in poliRes:
    if val != 0:
        print(char + ": " + str(val));

WARRIOR METRIC
Cai Mao: 0.291970802919708
Cao Cao: 0.2539404553415061
Lu Bu: 0.2465506898620276
Sun Jian: 0.2381443298969072
Huang Gai: 0.23529411764705882
Yuan Shao: 0.2122122122122122
Liu Bei: 0.21137026239067055
Yu Jin: 0.20967741935483872
Cao Hong: 0.20863309352517986
Guan Yu: 0.18350515463917524
Sun Ce: 0.1834862385321101
Gongsun Zan: 0.18275862068965518
Xiahou Dun: 0.17786561264822134
Dong Zhuo: 0.17517006802721088
Zhang Fei: 0.17391304347826086
Han Dang: 0.16666666666666666
Cheng Pu: 0.15846994535519127
Liu Biao: 0.1532567049808429
Xiahou Yuan: 0.14666666666666667
Zhao Yun: 0.14285714285714285
Chen Lin: 0.14285714285714285
Dian Wei: 0.1309823677581864
Taishi Ci: 0.1270718232044199
Jia Xu: 0.125
Yue Jin: 0.12244897959183673
Zhang Bao: 0.11827956989247312
Wang Yun: 0.11737089201877934
Lu Zhi: 0.11475409836065574
Tao Qian: 0.11133603238866396
Kong Rong: 0.1076923076923077
Yuan Shu: 0.10596026490066225
Xun Yu: 0.08108108108108109
Mi Zhu: 0.06607929515418502
Empress Dong: 0.051813471

Results are weird, but surprisingly coherents.<br>
We do expect low values (well, not this low actually...) and since we knew that poliWords.txt was less accurate than warWords.txt we could also predict that a direct comparison between the two metric would be meaningless. Still the ranking is highly reliable, at least I'm confident about this till position 14 for the warrior-metric. <br>
We can use those values in two ways:
<ul>
    <li>Define a fixed value which state who is a warrior and who is a politician. That value could estimated by computing the average from the top-10 averages of many tables;</li>
    <li>Label as warriors all characters whose rank surpass their politician one.</li>
</ul>
Second solution is the one I like more, but both of them seem fine. Further testing will reveal which one is actually more robust.

In [16]:
#TESTING

## Entity Analyses

Looking at entity-names our character is cited with can be a good way to get an idea of what kind of person he was and what he have done, especially if we know who the other people are and what that event or title actually rapresents. <br>
In Liu Bei case we may think that he is a noble since terms "Imperial Protector" and "Imperial Family" appears. They are no common words with a realy heavy meaning, usually only nobles (or rebels who want to kill them) interact with nobility, but the world "Imperial Protector" suggests that he may have good intentions towards them. Also the word "heaven" is used a good number of times, meaning that our character could have a strong faith or a positive connection to the royal family, which in the novel is commonly addressed as a sort of divine entity.

In [17]:
#Recover original data for going on with the presentation
file = open("Dataset/chap001-004.txt", "r");
chaps = file.read();
file.close();

text = chaps.split(".");
charInfo = infoXchar("Liu", "Bei", text);

wordFreq, uniqueWords, wordCount = frequentWord(charInfo, uniqueWords = set(), wordCount = {})

Found 69 phrases.


## Relation with other characters
To answer this, first we'll see with wich characters our target has been cited with, so we extract names out of our set of words. We can expect to see Zhang Fei and Guan Yu as the most common names to appear with Liu Bei since this three characters share a deep connection. Also, the name "Liu Bei" should appear quite often since it should be present in every sentece and probably more than one.

In [18]:
for entry in wordFreq[:100]: #top 100 since most words will not have the " ".
    if " " in entry[1]:
        print(entry);

(99, 'Liu Bei')
(28, 'Zhang Fei')
(15, 'Guan Yu')
(13, 'Zhu Jun')
(7, 'Zhang Bao')
(7, 'Lu Zhi')
(6, 'Liu Yu')
(6, 'But Liu')
(5, 'Zhou Jing')
(5, 'Zhang Jue')
(5, 'Yellow Scarves')
(4, 'Sun Jian')
(4, 'Gao Sheng')
(4, 'Dong Zhuo')


As predicted, Liu Bei, Zhang Fei and Guan Yu are the most common names, with an enourmous difference between the first place and the other. This happend not only because Liu Bei is with no doubt one of the most important character of the novel, but also because my function can save several times the same senteces, if the name we are looking for appears more than once. Luckly, excluding searches on Liu Bei, this not happen so often.<br>
We can understand who Zhang Fei and Guan Yu are by looking at the phrases where they appear inside the text blocks we have generated before.

In [19]:
i = 0;
for block in charInfo:
    if "Zhang Fei" in block:
        i += 1;
        print("CITATION " + str(i));
        print(block);

CITATION 1
 He spoke in a loud bass voice and looked as irresistible as a dashing horse At once Liu Bei saw he was no ordinary man and asked who he was

"Zhang Fei is my name," replied the stranger
CITATION 2
 I have come to join the army here"

Then Liu Bei told Guan Yu his own intentions, and all three went away to Zhang Fei's farm where they could talk over the grand project

Said Zhang Fei, "The peach trees in the orchard behind the house are just in full flower
CITATION 3
 May Heaven, the all-ruling, and Earth, the all-producing, read our hearts; and if we turn aside from righteousness or forget kindliness, may Heaven and Human smite us!"

They rose from their knees The two others bowed before Liu Bei as their elder brother, and Zhang Fei was to be the youngest of the trio This solemn ceremony performed, they slew other oxen and made a feast to which they invited the villagers
CITATION 4
 Then blacksmiths were summoned to forge weapons For Liu Bei they made a pair of ancient sword

### Informations Analysis
Some information are clear, some bear little meaning and others are incomplete or hard to understand without some previous knowleadge. Nevertheless we have discover quite a lot about both Zhang Fei and Liu Bei, together with a discrete amount of other useful information we can use to create ontology-individuals among the Title, Event and Location classes. <br>
All out of 20 senteces extracted from 4 chapters.<br>
Let's take a closer look at what we get, <font color = "green">highlighing the informations we can actually use</font> to enrich the ontology:
<ul>
    <li>Citation 1 - Zhang Fei is <font color = "green">physically described</font>;</li>
    <li>Citation 2 - Zhang Fei is a <font color = "green">farmer</font>;</li>
    <li>Citation 3 - Zhang Fei, Liu Bei and a third person perform a ritual, revealing that Liu Bei is the elder brother of the two;</li>
    <li>Citation 4 - The third person is Guan Yu, the three brothers receive equipments;</li>
    <li>Citation 5 - The three brothers fight with Yellow Scarves;</li>
    <li>Citation 6 - Battle;</li>
    <li>Citation 7 - Battle;</li>
    <li>Citation 8 - Zhang Fei is <font color = "green">psychologically described</font>;</li>
    <li>Citation 9 - <font color = "green">As above</font>;</li>
    <li>Citation 10 - <font color = "green">As above</font>;</li>
    <li>Citation 11 - Battle;</li>
    <li>Citation 12 - Battle;</li>
    <li>Citation 13 - Battle;</li>
    <li>Citation 14 - Battle;</li>
    <li>Citation 15 - Zhang Fei is a <font color = "green">regular soldier under Liu Bei</font>, which gained a political office;</li>
    <li>Citation 16 - Useless repetition of Citation 15;</li>
    <li>Citation 17 - Zhang Fei gets angry, if this happen other times then <font color = "green">it could be an important psychological information</font>;</li>
    <li>Citation 18 - Zhang Fei get drunk, if this happen other times then <font color = "green">it could be an important psychological information</font>;</li>
    <li>Citation 19 - Keep reading to see the end of the scene which reveals a <font color = "green">psychological information</font>;</li>
    <li>Citation 20 - This shows that Zhang Fei is <font color = "green">short tempered and easily reacts to provocations</font>.</li>
</ul>
This is nice because, even if we have to rely on humans to extract the actual information, NLP techniques allows us to 'summarize' 4 chapters into a relatively low number of fast-readable phrases. Also NLP suggest us what specific element we could focus on to further analyze our filtered data.<br>
Now, Liu Bei is one of the most important character and Zhang Fei is stronghly related to him so a question arise: will this approach work also with minor characters? Let's do the same we have done for Zhang Fei with Zhu Jun, a secondary character that doesn't even appears in the "Main Character.txt" file.

In [20]:
i = 0;
for block in charInfo:
    if "Zhu Jun" in block:
        i += 1;
        print("CITATION " + str(i));
        print(block);

CITATION 1
 And therein seemed to be horsemen and footmen innumerable, who swept to attack the imperial troops Fear came upon them, and Liu Bei led off his troops, but they were in disorder and returned defeated

Zhu Jun and Liu Bei considered the matter
CITATION 2
 Fear came upon them, and Liu Bei led off his troops, but they were in disorder and returned defeated

Zhu Jun and Liu Bei considered the matter

"Zhang Bao uses magic," said Zhu Jun
CITATION 3


Zhang Bao quickly saw his magic had been countered and turned to retire Then he was attacked on the flanks by Guan Yu and Zhang Fei, and in rear by Liu Bei and Zhu Jun The rebels were routed
CITATION 4
 When Zhu Jun arrived, Han Zhong went to oppose him Zhu Jun sent Liu Bei and his brothers to attack the southwest corner of the city Han Zhong at once led the best of his troops to defend the city
CITATION 5
 When famine pressed upon the besieged, they sent a messenger to offer to surrender, but Zhu Jun refused the offer

Said Liu Bei

### Informations Analyses
<ul>
    <li>Citation 1 - War scene, <font color = "green">Liu Bei and Zhu Jun are allies that fight for the imperial army</font> agaist forces we can't infere yet;</li>
    <li>Citation 2 - Repetition of the citation above with new little meaning;</li>
    <li>Citation 3 - Zhu Jun and the three brothers are <font color = "green">fighting against rebels</font>;</li>
    <li>Citation 4 - The battle <font color = "green">take place in Han Zhong city</font>;</li>
    <li>Citation 5 - Almost a psychological information on both Zhu Jun and Liu Bei, but the dialogue ends too soon;</li>
    <li>Citation 6 - Discover that Zhu Jun have a higher Military Rank than Sun Jian, at least in this battle;</li>
    <li>Citation 7 - Discover name and Military Rank of the rebel army leader in this battle and <font color = "green">Zhu Jun's Military Rank</font>;</li>
    <li>Citation 8 - <font color = "green">Psychological information about Liu Bei</font>, but we can't infere it for Zhu Jun.</li>
</ul>

Understanding the relation between Liu Bei and Zhu Jun was easy and we also manage to discover new informations about Event, Title and Location classes. Not bad for a character that most likely will not appear in the novel again.<br>
It's worth noticing that infering Event and Location data out of battles citations is possibile if we focus on a certain limited amount of chapters, with more we could risk to mix informations up. Still, if we use ontology's data to check when two or more characters fought together in a certain location, then we could avoid inferrig wrong things.<br>
Also it may be interesting to store together all sentences that let us understand a specific kind of relation or intention, such as love or admiration, and use those data for ML-training.
## Infering Event
Infering an event from scratch is extremely hard: you should summarize a non-clear number of phrases to a single phrase, understanding when a certain single event starts and ends while many others are going on. Just summarise a text is hard, doing something like this in a one-week project is out of my ability. Yet, if you know what you are looking for things get easier.<br>
If we already have a specific Event in mind we can use chapter-titles and keyword or entity searching to infere where in the book that event takes place than (manually) add the tags [EventName-START] and [EventName-END]. Once we have done that, we may label phrases during the characterInfo retrieval step, adding to each block the closer EVENTNAME tag in order to quickly links scene and Event. The same can be done for Location.