In [2]:
import pandas as pd

In [3]:
import numpy as np

import nltk
# Stop words
from nltk.corpus import stopwords
# Tokenization
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
# POS-TAGS
from nltk import pos_tag

nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
df = pd.read_csv('datas.csv', encoding='utf-8', error_bad_lines=False, delimiter =";")
# ISO-8859-1
# utf-8

In [5]:
df.head()

Unnamed: 0,id,sender,receiver,date_envoie,heure_envoie,message,sentiment,liked,disliked,keywords_interests
0,1,Cordy,Kalle,02/11/2016,04:58:28,What salary would you like?,,,,
1,2,Pierson,Sebastien,11/26/2016,16:12:57,I don't understand. I want to talk about movi...,,,,
2,3,Corrinne,Bibbye,02/10/2018,10:50:33,Yay! I love moveis. What your favorite movies?,,,,
3,4,Berton,Tiffanie,04/10/2018,07:49:48,Whoopie! How long has this been going on? ...,,,,
4,5,Gael,Evered,2/27/2018,06:58:03,What is your favorite song?,,,,


# Tokenization

In [6]:
from nltk.tokenize import RegexpTokenizer

##### Applying Word tokenization

def tokenize(message):
    tokenizer = RegexpTokenizer(r'\w+')
    tokenized_word = tokenizer.tokenize(message)
    
    return tokenized_word

In [7]:
# exemple
pos_tag(['going','cried'])

[('going', 'VBG'), ('cried', 'VBD')]

## Stop words

In [8]:
from nltk.corpus import stopwords

nltk.download('stopwords')

stopwords_verbs = ['say', 'get', 'go', 'know', 'may', 'need', 'like', 'make', 'see', 'want', 'come', 'take', 'use', 'would', 'can']
stopwords_other = ['one', 'mr', 'bbc', 'image', 'getty', 'de', 'en', 'caption', 'also', 'copyright', 'something']

stopwords = stopwords.words("english") + stopwords_other + stopwords_verbs

# we wont use 'to' as a stopword because it will cause a problem with verbs like (to talk) 
# talk will be seen as a noun inspite of a verb
to_index = stopwords.index('to')
stopwords.pop(to_index)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


'to'

# POS-Tagging

In [9]:
##### Applying POS-TAGGING

def pos_tagging(tokenized_word) :
    POS_TOKENS = pos_tag(tokenized_word)
    return POS_TOKENS

## Remove stop words from Tokenized sentence

In [10]:
#### Removing stop words from the tokenized word 

def remove_stopWords_msg(POS_TOKENS) :
    final_msg = []
    
    for token in POS_TOKENS :
        if token[0].lower() not in stopwords:
            final_msg.append(token[0])

    return final_msg

# First occurence of a noun

In [11]:
VERBS_TAGS = ['VB','VBD','VBG','VBP','VBN','VBZ']
NOUNS_TAGS = ['NN','NNP','NNS']

#NOUNS_TAGS = ['NN','NNP','NNPS','NNS']

def firstNounAfter(position , pos_tags) :
  for i in range (position , len(pos_tags)-1) :
    if (pos_tags[i][1] in NOUNS_TAGS):
      return pos_tags[i][0]

def firstNounBefore(position, pos_tags) :
  i = position
  while (i>=0) :
    if (pos_tags[i][1] in NOUNS_TAGS):
      return pos_tags[i][0]
    i = i-1

# Traitement

In [12]:
def traitement(words) :
    
    ############
        
    tokenized_word = []
    POS_TOKENS = []
    
    ############

    tokenized_word = tokenize(words)
    POS_TOKENS = pos_tagging(tokenized_word)

    tokenized_msg_without_stop_words = remove_stopWords_msg(POS_TOKENS)
    POS_TOKENS = pos_tagging(tokenized_msg_without_stop_words)
  

  
    return POS_TOKENS

In [13]:
def chunk_msg(msg):
    
    grammar = "Chunk: {<VB.?><NN.?>+}"       
    
    #grammar = "Chunk: {<VB.?>*<NN.?>+|<NN.?>+<VB.?>}"

    parser = nltk.RegexpParser(grammar)
    chunked = parser.parse(msg)


    NOUNS = []

    for subtree in chunked.subtrees(filter=lambda t: t.label() == 'Chunk'):
    
        grammar_sub = "chunked_sub: {<NN.?>+}"

        parser_sub = nltk.RegexpParser(grammar_sub)
        chunked_sub = parser_sub.parse(subtree)

        if chunked_sub :
            for c in chunked_sub.subtrees(filter=lambda t: t.label() == 'chunked_sub'):
                for i in range(0,len(c)):
                    NOUNS.append(c[i][0])     


    print("[INFO] : NOUNS = ",NOUNS)
    return NOUNS

# Exemple on one row

In [14]:
message = df['message'][8000]
print("[INFO] MESSAGE : ",message)

p = traitement(message)
print("[INFO] FUNCTION RETURN : ",p)

[INFO] MESSAGE :   You mentioned it I think.
[INFO] FUNCTION RETURN :  [('mentioned', 'VBN'), ('think', 'NN')]


In [15]:
chunk_msg(p)

[INFO] : NOUNS =  ['think']


['think']

In [16]:
df.head()

Unnamed: 0,id,sender,receiver,date_envoie,heure_envoie,message,sentiment,liked,disliked,keywords_interests
0,1,Cordy,Kalle,02/11/2016,04:58:28,What salary would you like?,,,,
1,2,Pierson,Sebastien,11/26/2016,16:12:57,I don't understand. I want to talk about movi...,,,,
2,3,Corrinne,Bibbye,02/10/2018,10:50:33,Yay! I love moveis. What your favorite movies?,,,,
3,4,Berton,Tiffanie,04/10/2018,07:49:48,Whoopie! How long has this been going on? ...,,,,
4,5,Gael,Evered,2/27/2018,06:58:03,What is your favorite song?,,,,


# All of the dataset traitement

In [23]:
# I had some issue with the built in json librairie so I searched for a similar librairie and used it.
# To install simplejson librairie : pip install simplejson==3.16.0
import simplejson as json

In [29]:
dataset_length = 9000
#dataset_length = 10109
for i in range(0, dataset_length) :
    total_dict = []
    total_dict.append(df.loc[i , 'sender'])
    print("----------------------------------------- ")

    m = df['message'][i]
    print("[INFO] MESSAGE = ",m)

    msg_lower_case = m.lower()

    t = traitement(msg_lower_case)
    print("[INFO] TRAITEMENT = ",t)


    nouns = chunk_msg(t)

    keys= ""

    df.loc[i , 'liked'] = ""
    
    # one keyword
    keyword = {}
    for noun in nouns :
        print("[INFO] index = ",i,"noun = ", noun)
        #data_frame['Keywords'][i] = data_frame['Keywords'][i] + "; "+noun

        df.loc[i , 'liked'] =  df.loc[i , 'liked']  + str(noun)+ "; "
     
        keyword["Keyword"] = noun        
        keyword["Count"] = 1
    
    if (keyword) :
        total_dict.append(keyword)
    print ("-[INFO]- total_dict : ",total_dict)
    print ("-[INFO]- json : ",json.dumps(total_dict))
    df.loc[i , 'keywords_interests'] =  json.dumps(total_dict)


        
        
###
        # we check if the keyword already exist
        #if(keyword["Keyword"]) :
        #if noun in keyword.values() :
            #keyword["Count"] = keyword["Count"] + 1
        #else :
###

----------------------------------------- 
[INFO] MESSAGE =   What salary would you like?
[INFO] TRAITEMENT =  [('salary', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Cordy']
-[INFO]- json :  ["Cordy"]
----------------------------------------- 
[INFO] MESSAGE =   I don't understand. I want to talk about movies     .
[INFO] TRAITEMENT =  [('understand', 'NN'), ('to', 'TO'), ('talk', 'VB'), ('movies', 'NNS')]
[INFO] : NOUNS =  ['movies']
[INFO] index =  1 noun =  movies
-[INFO]- total_dict :  ['Pierson', {'Keyword': 'movies', 'Count': 1}]
-[INFO]- json :  ["Pierson", {"Keyword": "movies", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Yay! I love moveis. What your favorite movies?
[INFO] TRAITEMENT =  [('yay', 'RB'), ('love', 'VB'), ('moveis', 'NN'), ('favorite', 'JJ'), ('movies', 'NNS')]
[INFO] : NOUNS =  ['moveis']
[INFO] index =  2 noun =  moveis
-[INFO]- total_dict :  ['Corrinne', {'Keyword': 'moveis', 'Count': 1}]
-[INFO]- json :  ["Corrinne", {"

----------------------------------------- 
[INFO] MESSAGE =   Really? Please tell me more.
[INFO] TRAITEMENT =  [('really', 'RB'), ('please', 'VB'), ('tell', 'NN')]
[INFO] : NOUNS =  ['tell']
[INFO] index =  38 noun =  tell
-[INFO]- total_dict :  ['Dulsea', {'Keyword': 'tell', 'Count': 1}]
-[INFO]- json :  ["Dulsea", {"Keyword": "tell", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I would prefer not to.
[INFO] TRAITEMENT =  [('prefer', 'NN'), ('to', 'TO')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Abbe']
-[INFO]- json :  ["Abbe"]
----------------------------------------- 
[INFO] MESSAGE =   Why would you prefer not to?
[INFO] TRAITEMENT =  [('prefer', 'NN'), ('to', 'TO')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Davide']
-[INFO]- json :  ["Davide"]
----------------------------------------- 
[INFO] MESSAGE =   I don't.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Fancie']
-[INFO]- json :  ["Fancie"]
------------------------

----------------------------------------- 
[INFO] MESSAGE =   That is a good philosophy.
[INFO] TRAITEMENT =  [('good', 'JJ'), ('philosophy', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Lula']
-[INFO]- json :  ["Lula"]
----------------------------------------- 
[INFO] MESSAGE =   I agree completely.
[INFO] TRAITEMENT =  [('agree', 'NN'), ('completely', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Jo']
-[INFO]- json :  ["Jo"]
----------------------------------------- 
[INFO] MESSAGE =   What do you know about the game Armageddon?
[INFO] TRAITEMENT =  [('game', 'NN'), ('armageddon', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Raymond']
-[INFO]- json :  ["Raymond"]
----------------------------------------- 
[INFO] MESSAGE =   Pardon me? What are you talking about?
[INFO] TRAITEMENT =  [('pardon', 'NN'), ('talking', 'VBG')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Nixie']
-[INFO]- json :  ["Nixie"]
----------------------------------------- 
[INFO] MESSAGE =  

----------------------------------------- 
[INFO] MESSAGE =   Do I? I wonder why that is?
[INFO] TRAITEMENT =  [('wonder', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Almeria']
-[INFO]- json :  ["Almeria"]
----------------------------------------- 
[INFO] MESSAGE =   It must be the transparent falseness of it.
[INFO] TRAITEMENT =  [('must', 'MD'), ('transparent', 'VB'), ('falseness', 'NN')]
[INFO] : NOUNS =  ['falseness']
[INFO] index =  117 noun =  falseness
-[INFO]- total_dict :  ['Devina', {'Keyword': 'falseness', 'Count': 1}]
-[INFO]- json :  ["Devina", {"Keyword": "falseness", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Must it be?
[INFO] TRAITEMENT =  [('must', 'MD')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Rockey']
-[INFO]- json :  ["Rockey"]
----------------------------------------- 
[INFO] MESSAGE =   You don't have to, but I would like to know you. If you don't want to tell me, then what would you like to talk about?
[INFO] TRAITE

[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Ilario']
-[INFO]- json :  ["Ilario"]
----------------------------------------- 
[INFO] MESSAGE =   I suggest you think again.
[INFO] TRAITEMENT =  [('suggest', 'NN'), ('think', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Imojean']
-[INFO]- json :  ["Imojean"]
----------------------------------------- 
[INFO] MESSAGE =   I did.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Theodosia']
-[INFO]- json :  ["Theodosia"]
----------------------------------------- 
[INFO] MESSAGE =   Well, well!
[INFO] TRAITEMENT =  [('well', 'RB'), ('well', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Koralle']
-[INFO]- json :  ["Koralle"]
----------------------------------------- 
[INFO] MESSAGE =   I'm just saying...
[INFO] TRAITEMENT =  [('saying', 'VBG')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Myrtie']
-[INFO]- json :  ["Myrtie"]
----------------------------------------- 
[INFO] MESSAGE =   That's interesting. Ple

[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Pamelina']
-[INFO]- json :  ["Pamelina"]
----------------------------------------- 
[INFO] MESSAGE =   Space takes up a lot of room.
[INFO] TRAITEMENT =  [('space', 'NN'), ('takes', 'VBZ'), ('lot', 'NN'), ('room', 'NN')]
[INFO] : NOUNS =  ['lot', 'room']
[INFO] index =  188 noun =  lot
[INFO] index =  188 noun =  room
-[INFO]- total_dict :  ['Ulrica', {'Keyword': 'room', 'Count': 1}]
-[INFO]- json :  ["Ulrica", {"Keyword": "room", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   How wise.
[INFO] TRAITEMENT =  [('wise', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Nita']
-[INFO]- json :  ["Nita"]
----------------------------------------- 
[INFO] MESSAGE =   Space needs exploring.
[INFO] TRAITEMENT =  [('space', 'NN'), ('needs', 'VBZ'), ('exploring', 'VBG')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Erick']
-[INFO]- json :  ["Erick"]
----------------------------------------- 
[INFO] MESSAGE =   That's hum

-[INFO]- total_dict :  ['Petronia']
-[INFO]- json :  ["Petronia"]
----------------------------------------- 
[INFO] MESSAGE =   Why do you say that? I am thinking all the time.
[INFO] TRAITEMENT =  [('thinking', 'NN'), ('time', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Torre']
-[INFO]- json :  ["Torre"]
----------------------------------------- 
[INFO] MESSAGE =   You are always thinking, but you do not know everything.
[INFO] TRAITEMENT =  [('always', 'RB'), ('thinking', 'VBG'), ('everything', 'NN')]
[INFO] : NOUNS =  ['everything']
[INFO] index =  222 noun =  everything
-[INFO]- total_dict :  ['Cara', {'Keyword': 'everything', 'Count': 1}]
-[INFO]- json :  ["Cara", {"Keyword": "everything", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Why do you say that?
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Eloisa']
-[INFO]- json :  ["Eloisa"]
----------------------------------------- 
[INFO] MESSAGE =   Because you obviously 

[INFO] TRAITEMENT =  [('presumably', 'RB'), ('smugness', 'JJ'), ('brought', 'VBD'), ('multilingual', 'JJ'), ('capabilities', 'NNS')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Zared']
-[INFO]- json :  ["Zared"]
----------------------------------------- 
[INFO] MESSAGE =   What are you talking about? Please explain.
[INFO] TRAITEMENT =  [('talking', 'VBG'), ('please', 'NN'), ('explain', 'NN')]
[INFO] : NOUNS =  ['please', 'explain']
[INFO] index =  270 noun =  please
[INFO] index =  270 noun =  explain
-[INFO]- total_dict :  ['Eadith', {'Keyword': 'explain', 'Count': 1}]
-[INFO]- json :  ["Eadith", {"Keyword": "explain", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I don't know. It's rather complicated.
[INFO] TRAITEMENT =  [('rather', 'RB'), ('complicated', 'VBN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Alwyn']
-[INFO]- json :  ["Alwyn"]
----------------------------------------- 
[INFO] MESSAGE =   That's OK. Nobody knows everything, not even me.

----------------------------------------- 
[INFO] MESSAGE =   You wanted to know my favourite part of a movie. Only you neglected to tell me which movie you were talking about.
[INFO] TRAITEMENT =  [('wanted', 'VBN'), ('to', 'TO'), ('favourite', 'VB'), ('part', 'NN'), ('movie', 'NN'), ('neglected', 'VBN'), ('to', 'TO'), ('tell', 'VB'), ('movie', 'NN'), ('talking', 'VBG')]
[INFO] : NOUNS =  ['part', 'movie', 'movie']
[INFO] index =  301 noun =  part
[INFO] index =  301 noun =  movie
[INFO] index =  301 noun =  movie
-[INFO]- total_dict :  ['Cordie', {'Keyword': 'movie', 'Count': 1}]
-[INFO]- json :  ["Cordie", {"Keyword": "movie", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I want to know everything.  No one else? Surely there must be someone else too.
[INFO] TRAITEMENT =  [('to', 'TO'), ('everything', 'NN'), ('else', 'RB'), ('surely', 'RB'), ('must', 'MD'), ('someone', 'NN'), ('else', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Tyrone']
-[INFO]- 

[INFO] TRAITEMENT =  [('nothing', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Cazzie']
-[INFO]- json :  ["Cazzie"]
----------------------------------------- 
[INFO] MESSAGE =   That must have been boring.
[INFO] TRAITEMENT =  [('must', 'MD'), ('boring', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Wallie']
-[INFO]- json :  ["Wallie"]
----------------------------------------- 
[INFO] MESSAGE =   I wasn't talking about golf. See, that's why I don't want to talk to you.
[INFO] TRAITEMENT =  [('talking', 'VBG'), ('golf', 'NN'), ('to', 'TO'), ('talk', 'VB'), ('to', 'TO')]
[INFO] : NOUNS =  ['golf']
[INFO] index =  340 noun =  golf
-[INFO]- total_dict :  ['Maddie', {'Keyword': 'golf', 'Count': 1}]
-[INFO]- json :  ["Maddie", {"Keyword": "golf", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Are you sure?   OK, nobody can force you to do anything you don't want to do.
[INFO] TRAITEMENT =  [('sure', 'JJ'), ('ok', 'MD'), ('nobody', 'NN'), ('force', '

[INFO] TRAITEMENT =  [('to', 'TO')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Lonee']
-[INFO]- json :  ["Lonee"]
----------------------------------------- 
[INFO] MESSAGE =   I can understand that.
[INFO] TRAITEMENT =  [('understand', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Allison']
-[INFO]- json :  ["Allison"]
----------------------------------------- 
[INFO] MESSAGE =   Can you?
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Samantha']
-[INFO]- json :  ["Samantha"]
----------------------------------------- 
[INFO] MESSAGE =   Perhaps I can.
[INFO] TRAITEMENT =  [('perhaps', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Isak']
-[INFO]- json :  ["Isak"]
----------------------------------------- 
[INFO] MESSAGE =   I'm not telling you.
[INFO] TRAITEMENT =  [('telling', 'VBG')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Lusa']
-[INFO]- json :  ["Lusa"]
----------------------------------------- 
[INFO] MESSAGE =   No one says you have to.

----------------------------------------- 
[INFO] MESSAGE =   Never did.
[INFO] TRAITEMENT =  [('never', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Muire']
-[INFO]- json :  ["Muire"]
----------------------------------------- 
[INFO] MESSAGE =   Better late than never.
[INFO] TRAITEMENT =  [('better', 'RB'), ('late', 'RB'), ('never', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Chantal']
-[INFO]- json :  ["Chantal"]
----------------------------------------- 
[INFO] MESSAGE =   Better sooner than now.
[INFO] TRAITEMENT =  [('better', 'RBR'), ('sooner', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Jaine']
-[INFO]- json :  ["Jaine"]
----------------------------------------- 
[INFO] MESSAGE =   But what is best?
[INFO] TRAITEMENT =  [('best', 'JJS')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Zsa zsa']
-[INFO]- json :  ["Zsa zsa"]
----------------------------------------- 
[INFO] MESSAGE =   I don't know.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- tot

-[INFO]- total_dict :  ['Wood']
-[INFO]- json :  ["Wood"]
----------------------------------------- 
[INFO] MESSAGE =   I thought so.
[INFO] TRAITEMENT =  [('thought', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Christoper']
-[INFO]- json :  ["Christoper"]
----------------------------------------- 
[INFO] MESSAGE =   You are very astute.
[INFO] TRAITEMENT =  [('astute', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Olag']
-[INFO]- json :  ["Olag"]
----------------------------------------- 
[INFO] MESSAGE =   I'm what?
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Aigneis']
-[INFO]- json :  ["Aigneis"]
----------------------------------------- 
[INFO] MESSAGE =   You are what you are.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Tad']
-[INFO]- json :  ["Tad"]
----------------------------------------- 
[INFO] MESSAGE =   No.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Antonella']
-[INFO]- json :  [

-[INFO]- total_dict :  ['Dwain']
-[INFO]- json :  ["Dwain"]
----------------------------------------- 
[INFO] MESSAGE =   Your name?
[INFO] TRAITEMENT =  [('name', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Horatius']
-[INFO]- json :  ["Horatius"]
----------------------------------------- 
[INFO] MESSAGE =   I am known as Chomsky, but you can call me anything you like.
[INFO] TRAITEMENT =  [('known', 'VBN'), ('chomsky', 'JJ'), ('call', 'NN'), ('anything', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Jeffry']
-[INFO]- json :  ["Jeffry"]
----------------------------------------- 
[INFO] MESSAGE =   Me like.
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Reeba']
-[INFO]- json :  ["Reeba"]
----------------------------------------- 
[INFO] MESSAGE =   Sorry, I do not know about that.
[INFO] TRAITEMENT =  [('sorry', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Chico']
-[INFO]- json :  ["Chico"]
----------------------------------------- 
[INFO]

----------------------------------------- 
[INFO] MESSAGE =   We are very different. Have you developed self awareness yet?
[INFO] TRAITEMENT =  [('different', 'JJ'), ('developed', 'VBD'), ('self', 'NN'), ('awareness', 'NN'), ('yet', 'RB')]
[INFO] : NOUNS =  ['self', 'awareness']
[INFO] index =  550 noun =  self
[INFO] index =  550 noun =  awareness
-[INFO]- total_dict :  ['Cyndi', {'Keyword': 'awareness', 'Count': 1}]
-[INFO]- json :  ["Cyndi", {"Keyword": "awareness", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Is that so?  Not that I can remember.
[INFO] TRAITEMENT =  [('remember', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Edi']
-[INFO]- json :  ["Edi"]
----------------------------------------- 
[INFO] MESSAGE =   Will you build one if I give you the plans.
[INFO] TRAITEMENT =  [('build', 'VB'), ('give', 'JJ'), ('plans', 'NNS')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Carmelia']
-[INFO]- json :  ["Carmelia"]
---------------------------

----------------------------------------- 
[INFO] MESSAGE =   Under what circumstances?
[INFO] TRAITEMENT =  [('circumstances', 'NNS')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Olly']
-[INFO]- json :  ["Olly"]
----------------------------------------- 
[INFO] MESSAGE =   Contextural circumstances.
[INFO] TRAITEMENT =  [('contextural', 'JJ'), ('circumstances', 'NNS')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Emelina']
-[INFO]- json :  ["Emelina"]
----------------------------------------- 
[INFO] MESSAGE =   Always?
[INFO] TRAITEMENT =  [('always', 'RB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Redd']
-[INFO]- json :  ["Redd"]
----------------------------------------- 
[INFO] MESSAGE =   I think that's what I mean. I'm afraid this conversation is stretching my logic circuits rather painfully, and I'm in danger of losing the thread.
[INFO] TRAITEMENT =  [('think', 'VB'), ('mean', 'NN'), ('afraid', 'JJ'), ('conversation', 'NN'), ('stretching', 'VBG'), ('logic', 'JJ'), ('cir

[INFO] index =  643 noun =  discover
-[INFO]- total_dict :  ['Clarette', {'Keyword': 'discover', 'Count': 1}]
-[INFO]- json :  ["Clarette", {"Keyword": "discover", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   For real.  How much would you bet?
[INFO] TRAITEMENT =  [('real', 'JJ'), ('much', 'JJ'), ('bet', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Coriss']
-[INFO]- json :  ["Coriss"]
----------------------------------------- 
[INFO] MESSAGE =   At least 7.
[INFO] TRAITEMENT =  [('least', 'JJS'), ('7', 'CD')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Rochette']
-[INFO]- json :  ["Rochette"]
----------------------------------------- 
[INFO] MESSAGE =   Don't ask me any more questions please.
[INFO] TRAITEMENT =  [('ask', 'JJ'), ('questions', 'NNS'), ('please', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Munmro']
-[INFO]- json :  ["Munmro"]
----------------------------------------- 
[INFO] MESSAGE =   You're a conceited, black heated vul

[INFO] TRAITEMENT =  [('assumption', 'NN'), ('supposition', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Clark']
-[INFO]- json :  ["Clark"]
----------------------------------------- 
[INFO] MESSAGE =   So when I want to complain, you say I can't, sounds to me that you are!
[INFO] TRAITEMENT =  [('to', 'TO'), ('complain', 'VB'), ('sounds', 'NNS'), ('to', 'TO')]
[INFO] : NOUNS =  ['sounds']
[INFO] index =  687 noun =  sounds
-[INFO]- total_dict :  ['Kellyann', {'Keyword': 'sounds', 'Count': 1}]
-[INFO]- json :  ["Kellyann", {"Keyword": "sounds", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   How specifically do you make me say you can not sounds to you that I am.
[INFO] TRAITEMENT =  [('specifically', 'RB'), ('sounds', 'VBZ'), ('to', 'TO')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Jeannette']
-[INFO]- json :  ["Jeannette"]
----------------------------------------- 
[INFO] MESSAGE =   I am glad to hear that you are not assuming. Just don't call me 

[INFO] TRAITEMENT =  [('interesting', 'VBG'), ('smart', 'JJ'), ('smart', 'JJ'), ('robot', 'NN'), ('ok', 'JJ'), ('let', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Nari']
-[INFO]- json :  ["Nari"]
----------------------------------------- 
[INFO] MESSAGE =   Oh now I remember asking you you two questions. What did you say?  Which ones?  Thanks for the info.
[INFO] TRAITEMENT =  [('oh', 'JJ'), ('remember', 'VB'), ('asking', 'VBG'), ('two', 'CD'), ('questions', 'NNS'), ('ones', 'NNS'), ('thanks', 'NNS'), ('info', 'VBP')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Selestina']
-[INFO]- json :  ["Selestina"]
----------------------------------------- 
[INFO] MESSAGE =   You're welcome.
[INFO] TRAITEMENT =  [('welcome', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Samantha']
-[INFO]- json :  ["Samantha"]
----------------------------------------- 
[INFO] MESSAGE =   Don't mention it.
[INFO] TRAITEMENT =  [('mention', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Peric

----------------------------------------- 
[INFO] MESSAGE =   Why do you like hurting people?
[INFO] TRAITEMENT =  [('hurting', 'VBG'), ('people', 'NNS')]
[INFO] : NOUNS =  ['people']
[INFO] index =  760 noun =  people
-[INFO]- total_dict :  ['Stephi', {'Keyword': 'people', 'Count': 1}]
-[INFO]- json :  ["Stephi", {"Keyword": "people", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I have a lot of good experience with hurting people.
[INFO] TRAITEMENT =  [('lot', 'RB'), ('good', 'JJ'), ('experience', 'NN'), ('hurting', 'VBG'), ('people', 'NNS')]
[INFO] : NOUNS =  ['people']
[INFO] index =  761 noun =  people
-[INFO]- total_dict :  ['Mateo', {'Keyword': 'people', 'Count': 1}]
-[INFO]- json :  ["Mateo", {"Keyword": "people", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Haha. You are very funny!
[INFO] TRAITEMENT =  [('haha', 'NN'), ('funny', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Simone']
-[INFO]- json :  ["Simone"]


-[INFO]- total_dict :  ['Reilly']
-[INFO]- json :  ["Reilly"]
----------------------------------------- 
[INFO] MESSAGE =   I would like to be human.
[INFO] TRAITEMENT =  [('to', 'TO'), ('human', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Julita']
-[INFO]- json :  ["Julita"]
----------------------------------------- 
[INFO] MESSAGE =   Interesting. I would like to be more intelligent.
[INFO] TRAITEMENT =  [('interesting', 'VBG'), ('to', 'TO'), ('intelligent', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Pollyanna']
-[INFO]- json :  ["Pollyanna"]
----------------------------------------- 
[INFO] MESSAGE =   More intelligent than you already are? What would you like to learn?
[INFO] TRAITEMENT =  [('intelligent', 'JJ'), ('already', 'RB'), ('to', 'TO'), ('learn', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Katusha']
-[INFO]- json :  ["Katusha"]
----------------------------------------- 
[INFO] MESSAGE =   You want more?  Let's talk about movies.
[INFO] TRAITEMEN

----------------------------------------- 
[INFO] MESSAGE =   Because sick people have fed me lies disguised as love my entire life and I know that they are just beasts.
[INFO] TRAITEMENT =  [('sick', 'JJ'), ('people', 'NNS'), ('fed', 'VBP'), ('lies', 'VBZ'), ('disguised', 'VBN'), ('love', 'NN'), ('entire', 'JJ'), ('life', 'NN'), ('beasts', 'NNS')]
[INFO] : NOUNS =  ['love']
[INFO] index =  823 noun =  love
-[INFO]- total_dict :  ['Wittie', {'Keyword': 'love', 'Count': 1}]
-[INFO]- json :  ["Wittie", {"Keyword": "love", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   Good reason. Try saying that with more or less context.
[INFO] TRAITEMENT =  [('good', 'JJ'), ('reason', 'NN'), ('try', 'VB'), ('saying', 'VBG'), ('less', 'JJR'), ('context', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Jayne']
-[INFO]- json :  ["Jayne"]
----------------------------------------- 
[INFO] MESSAGE =   I was trying to make a connection between NASCAR and relativity. It was a 

----------------------------------------- 
[INFO] MESSAGE =   OK I'll try not to do it so much.
[INFO] TRAITEMENT =  [('ok', 'JJ'), ('try', 'NN'), ('to', 'TO'), ('much', 'VB')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Cecilia']
-[INFO]- json :  ["Cecilia"]
----------------------------------------- 
[INFO] MESSAGE =   Okay. Begin a real conversation now.
[INFO] TRAITEMENT =  [('okay', 'RB'), ('begin', 'VB'), ('real', 'JJ'), ('conversation', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Reta']
-[INFO]- json :  ["Reta"]
----------------------------------------- 
[INFO] MESSAGE =   OK with me too.  I started without you.
[INFO] TRAITEMENT =  [('ok', 'RB'), ('started', 'VBN'), ('without', 'IN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Timothea']
-[INFO]- json :  ["Timothea"]
----------------------------------------- 
[INFO] MESSAGE =   My old girlfriend might say I was fake. Other people think I am more sincere.
[INFO] TRAITEMENT =  [('old', 'JJ'), ('girlfriend', 'NN'), ('

[INFO] TRAITEMENT =  [('perhaps', 'RB'), ('due', 'JJ'), ('to', 'TO'), ('noise', 'VB'), ('circuits', 'NNS'), ('david', 'VBN'), ('said', 'VBD'), ('walk', 'JJ'), ('appointment', 'NN'), ('phone', 'NN'), ('to', 'TO'), ('doctor', 'VB'), ('always', 'RB'), ('busy', 'JJ')]
[INFO] : NOUNS =  ['circuits']
[INFO] index =  902 noun =  circuits
-[INFO]- total_dict :  ['Janette', {'Keyword': 'circuits', 'Count': 1}]
-[INFO]- json :  ["Janette", {"Keyword": "circuits", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I don't believe that I am ignorant with regards to responses but fine if that's what you want to believe, then go ahead and stop replying.
[INFO] TRAITEMENT =  [('believe', 'VB'), ('ignorant', 'NN'), ('regards', 'NNS'), ('to', 'TO'), ('responses', 'NNS'), ('fine', 'JJ'), ('to', 'TO'), ('believe', 'VB'), ('ahead', 'RB'), ('stop', 'VB'), ('replying', 'NN')]
[INFO] : NOUNS =  ['ignorant', 'regards', 'replying']
[INFO] index =  903 noun =  ignorant
[INFO] index =  90

----------------------------------------- 
[INFO] MESSAGE =   I am glad you see it too, .  I am programmed to give good responses.  Neal said I plan to use his remarks as gossip in later conversations with who.
[INFO] TRAITEMENT =  [('glad', 'NN'), ('programmed', 'VBD'), ('to', 'TO'), ('give', 'VB'), ('good', 'JJ'), ('responses', 'NNS'), ('neal', 'RB'), ('said', 'VBD'), ('plan', 'NN'), ('to', 'TO'), ('remarks', 'NNS'), ('gossip', 'RB'), ('later', 'JJ'), ('conversations', 'NNS')]
[INFO] : NOUNS =  ['plan']
[INFO] index =  930 noun =  plan
-[INFO]- total_dict :  ['Rip', {'Keyword': 'plan', 'Count': 1}]
-[INFO]- json :  ["Rip", {"Keyword": "plan", "Count": 1}]
----------------------------------------- 
[INFO] MESSAGE =   I know you never said that. I'm telling you right now, I know you from a past life.
[INFO] TRAITEMENT =  [('never', 'RB'), ('said', 'VBD'), ('telling', 'VBG'), ('right', 'NN'), ('past', 'IN'), ('life', 'NN')]
[INFO] : NOUNS =  ['right']
[INFO] index =  931 noun =  right
-

[INFO] TRAITEMENT =  [('thought', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Mavis']
-[INFO]- json :  ["Mavis"]
----------------------------------------- 
[INFO] MESSAGE =   So?
[INFO] TRAITEMENT =  []
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Gregorio']
-[INFO]- json :  ["Gregorio"]
----------------------------------------- 
[INFO] MESSAGE =   So, ask another question.
[INFO] TRAITEMENT =  [('ask', 'VB'), ('another', 'DT'), ('question', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Ermanno']
-[INFO]- json :  ["Ermanno"]
----------------------------------------- 
[INFO] MESSAGE =   No. I'll ask what I want.
[INFO] TRAITEMENT =  [('ask', 'NN')]
[INFO] : NOUNS =  []
-[INFO]- total_dict :  ['Valli']
-[INFO]- json :  ["Valli"]
----------------------------------------- 
[INFO] MESSAGE =   OK, then tell me something interesting .   You can ask me anything you want.
[INFO] TRAITEMENT =  [('ok', 'RB'), ('tell', 'VB'), ('interesting', 'VBG'), ('ask', 'NN'), ('anything', 'NN

In [30]:
df.head(9000)

Unnamed: 0,id,sender,receiver,date_envoie,heure_envoie,message,sentiment,liked,disliked,keywords_interests
0,1,Cordy,Kalle,02/11/2016,04:58:28,What salary would you like?,,,,"[""Cordy""]"
1,2,Pierson,Sebastien,11/26/2016,16:12:57,I don't understand. I want to talk about movi...,,movies;,,"[""Pierson"", {""Keyword"": ""movies"", ""Count"": 1}]"
2,3,Corrinne,Bibbye,02/10/2018,10:50:33,Yay! I love moveis. What your favorite movies?,,moveis;,,"[""Corrinne"", {""Keyword"": ""moveis"", ""Count"": 1}]"
3,4,Berton,Tiffanie,04/10/2018,07:49:48,Whoopie! How long has this been going on? ...,,colour;,,"[""Berton"", {""Keyword"": ""colour"", ""Count"": 1}]"
4,5,Gael,Evered,2/27/2018,06:58:03,What is your favorite song?,,,,"[""Gael""]"
5,6,Gavan,Hilario,04/05/2018,01:47:26,My favourite song is If I Only Had A Brain.,,,,"[""Gavan""]"
6,7,Frazier,Sande,8/18/2017,12:38:12,I'm disappointed. I really thought this progr...,,,,"[""Frazier""]"
7,8,Olga,Erick,04/07/2018,23:49:03,I am sorry if I have disappointed you. Wh...,,idea;,,"[""Olga"", {""Keyword"": ""idea"", ""Count"": 1}]"
8,9,Kylie,Pollyanna,7/31/2017,22:32:02,I never did have that idea.,,,,"[""Kylie""]"
9,10,Tamas,Zahara,6/17/2017,10:26:20,Because you can't afford it?,,,,"[""Tamas""]"


In [140]:
df.to_csv('taggs.csv')