# Calling my API

In [1]:
#importing necessary libraries
from pymongo import MongoClient
import requests
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from textblob import TextBlob
import spacy
from spacy import displacy
import en_core_web_sm



In [2]:
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/mariana/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/mariana/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/mariana/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
#checking connection
url = 'http://localhost:5000/ejemplo1'
data = requests.get (url).json()
data

{'Amigos': ['Dobby', 'Ras', 'Sheriff', 'Ignacio'], 'Edad': 28, 'Nombre': 'Fer'}

In [4]:
##checking MongoDB
conn = MongoClient('localhost:27017')

# choose database to work with
db = conn.get_database('GoT')

# choose collection to work with
collection = db.get_collection('frases')

In [5]:
dataset = collection.find()

In [6]:
df = pd.DataFrame(dataset)
df.dropna(inplace=True)

In [7]:
df.head()

Unnamed: 0,_id,Season,Episode,Episode Title,Name,Sentence
0,5fc4c1600a5fb174ea948545,Season 1,Episode 1,Winter is Coming,waymar royce,How close did you get?
1,5fc4c1600a5fb174ea948546,Season 1,Episode 1,Winter is Coming,gared,We should head back to the wall.
2,5fc4c1600a5fb174ea948547,Season 1,Episode 1,Winter is Coming,royce,Do the dead frighten you?
3,5fc4c1600a5fb174ea948548,Season 1,Episode 1,Winter is Coming,gared,Our orders were to track the wildlings. We tra...
4,5fc4c1600a5fb174ea948549,Season 1,Episode 1,Winter is Coming,royce,You don't think he'll ask us how they died? Ge...


In [8]:
c = collection

In [9]:
names = list(c.distinct('Name'))

In [10]:
#drop some occasional characters
characters_drop = ['man', 'women', 'boy','girl', 'old man',]
dfnew =df[df['Name'].isin(characters_drop)]

In [11]:
talktative_cha = df['Name'].value_counts().reset_index()
talktative_cha.columns=['Character', 'No of Dialogues']
talktative_cha

Unnamed: 0,Character,No of Dialogues
0,tyrion lannister,1760
1,jon snow,1133
2,daenerys targaryen,1048
3,cersei lannister,1005
4,jaime lannister,945
...,...,...
559,dolorous,1
560,a voice,1
561,several stark bannermen,1
562,dornish prince,1


In [12]:
#Total dialogues by Seasons
temp = df['Season'].value_counts().reset_index()
temp.columns=['Season', 'Counts']
temp.sort_values(by='Season', inplace=True)

In [46]:
#Extracting last names from Name column
df['lastname'] = df['Name'].apply(lambda x : str(x).split()[-1])

lastname_count = df['lastname'].value_counts().head(10).reset_index()
#lastname.columns = ['Last Name', 'Counts']

In [15]:
##Extracting count sentence by character name
pd.DataFrame(c.aggregate([
    { '$sortByCount': "$Name" }
]))

Unnamed: 0,_id,count
0,tyrion lannister,1760
1,jon snow,1133
2,daenerys targaryen,1048
3,cersei lannister,1005
4,jaime lannister,945
...,...,...
560,cohollo,1
561,ser rodrik,1
562,bolton bannerman,1
563,watchman,1


In [16]:
#Let's find families last name, and thus, houses. Example: 'Lannister'
familias = df['Name'].apply(lambda x : str(x).split()[-1])
pd.DataFrame(c.find({'Name' : {'$regex' : ".*ygritte.*"}}).sort('sentence', -1).limit(5))

Unnamed: 0,_id,Season,Episode,Episode Title,Name,Sentence
0,5fc4c1600a5fb174ea9499d1,Season 2,Episode 6,The Old Gods and the New,ygritte,Burn them or maybe you'll need those swords ag...
1,5fc4c1600a5fb174ea9499d3,Season 2,Episode 6,The Old Gods and the New,ygritte,Hundreds and thousands. More than you've ever ...
2,5fc4c1600a5fb174ea9499cb,Season 2,Episode 6,The Old Gods and the New,ygritte,I gave you my name.
3,5fc4c1600a5fb174ea9499cf,Season 2,Episode 6,The Old Gods and the New,ygritte,You ought to burn them you killed.
4,5fc4c1600a5fb174ea9499d2,Season 2,Episode 6,The Old Gods and the New,ygritte,Ygritte.


In [17]:
grupo = familias

### 1. GET Endpoints + params of the query

In [18]:
#1st endpoint - get all messages from a character - in this case, Tyrion Lannister
url_query = 'http://localhost:5000/frases/'
person = 'tyrion lannister'

In [19]:
frases = requests.get(url_query + person).json()

In [20]:
df1 = pd.DataFrame(frases)
df1

Unnamed: 0,Episode,Episode Title,Name,Sentence
0,Episode 1,Winter is Coming,tyrion lannister,I did hear something about that.
1,Episode 1,Winter is Coming,tyrion lannister,Mmh. It is true what they say about the Northe...
2,Episode 1,Winter is Coming,tyrion lannister,And the other brother?
3,Episode 1,Winter is Coming,tyrion lannister,I hear he hates that nickname.
4,Episode 1,Winter is Coming,tyrion lannister,There's the pretty one. And there's the clever...
...,...,...,...,...
1755,Episode 6,The Iron Throne,tyrion lannister,Good. Time to start incurring a new one. We ha...
1756,Episode 6,The Iron Throne,tyrion lannister,"Grandmaester, ahem, it is my theory, based on ..."
1757,Episode 6,The Iron Throne,tyrion lannister,Find the best builders and set them to the task.
1758,Episode 6,The Iron Throne,tyrion lannister,"Lord Davos, we have an armada to rebuild and p..."


In [21]:
#2nd endpoint - get all distinct character names from GoT database
url_query = 'http://localhost:5000/personajes'
names = c.distinct('Name')

In [22]:
personajes = requests.get(url_query).json()

In [23]:
df2 = pd.DataFrame(personajes)
df2

Unnamed: 0,0
0,
1,a voice
2,addam marbrand
3,aemon
4,aeron
...,...
560,young hodor
561,young lyanna
562,young man
563,young ned


In [24]:
#3rd endpoint - input the house and get total number of messages from that house
url_query = 'http://localhost:5000/mensajes/'
house = 'lannister'
mensajes_familias = requests.get(url_query + house)

In [25]:
df3 = pd.DataFrame(mensajes_familias)

### 2. POST Endpoints + params of the query

In [26]:
#Adding a new line to our GoT dataset
datos = {
    'Season': '2',
    'Episode': '6',
    'Episode Title':'The Old Gods and the New',
    'Name': 'ygritte',
    'Sentence': 'You know nothing John Snow'}

In [27]:
url = 'http://localhost:5000/nuevafrase'

In [28]:
requests.post(url, data=datos)

<Response [200]>

### 3. Sentiment Analysis 

#### 3.1 NLTK

In [29]:
todo = pd.DataFrame(list(collection.find()))
todo.head()

Unnamed: 0,_id,Season,Episode,Episode Title,Name,Sentence
0,5fc4c1600a5fb174ea948545,Season 1,Episode 1,Winter is Coming,waymar royce,How close did you get?
1,5fc4c1600a5fb174ea948546,Season 1,Episode 1,Winter is Coming,gared,We should head back to the wall.
2,5fc4c1600a5fb174ea948547,Season 1,Episode 1,Winter is Coming,royce,Do the dead frighten you?
3,5fc4c1600a5fb174ea948548,Season 1,Episode 1,Winter is Coming,gared,Our orders were to track the wildlings. We tra...
4,5fc4c1600a5fb174ea948549,Season 1,Episode 1,Winter is Coming,royce,You don't think he'll ask us how they died? Ge...


In [30]:
#Checking correct functioning for sentimentAnalysis functions
def sentimentAnalysis(sentence):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores(sentence)
    return polarity

In [31]:
def sentimentAnalysis(sentence):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores(sentence)
    pol = polarity['compound']
    return pol

In [32]:
todo['sentiment_compound'] = todo['Sentence'].apply(sentimentAnalysis)

In [33]:
todo.head()

Unnamed: 0,_id,Season,Episode,Episode Title,Name,Sentence,sentiment_compound
0,5fc4c1600a5fb174ea948545,Season 1,Episode 1,Winter is Coming,waymar royce,How close did you get?,0.0
1,5fc4c1600a5fb174ea948546,Season 1,Episode 1,Winter is Coming,gared,We should head back to the wall.,0.0
2,5fc4c1600a5fb174ea948547,Season 1,Episode 1,Winter is Coming,royce,Do the dead frighten you?,-0.7717
3,5fc4c1600a5fb174ea948548,Season 1,Episode 1,Winter is Coming,gared,Our orders were to track the wildlings. We tra...,0.4847
4,5fc4c1600a5fb174ea948549,Season 1,Episode 1,Winter is Coming,royce,You don't think he'll ask us how they died? Ge...,-0.5574


In [34]:
todo.groupby(['Name'])['sentiment_compound'].mean()

Name
a voice           0.000000
addam marbrand    0.210750
aemon             0.047414
aeron             0.217456
aerson           -0.726900
                    ...   
young hodor      -0.032000
young lyanna      0.000000
young man        -0.245675
young ned         0.087717
young rodrik      0.000000
Name: sentiment_compound, Length: 564, dtype: float64

#### 3.2 TextBlob

In [35]:
#Checking TextBlob functioning 
en_blob = TextBlob(u'que tengas buen buen day')
en_blob = en_blob.translate(from_lang='es',to='en')
en_blob

TextBlob("have a good day")

In [36]:
def sentiment(text):
    en_blob=TextBlob(u'{}'.format(text))
    translated = en_blob.translate(to='fr')
    return translated.sentiment

In [37]:
text = 'You know nothing John Snow'

In [38]:
sentiment(text)

Sentiment(polarity=0.0, subjectivity=0.0)

In [42]:
#Endpoint to get mean polarity of all sentences of a character
url_query = 'http://localhost:5000/polaridad/'
name = 'ygritte'
polarity = requests.get(url_query + name)
print(polarity)

<Response [500]>


In [43]:
#Endpoint to get mean subjectivity of all sentences of a character
url_query = 'http://localhost:5000/subjetividad/'
name = 'ygritte'
subjectivity = requests.get(url_query + name)
print(subjectivity)

<Response [500]>
