## Libraries

In [1]:
import pandas as pd
import requests
#NLTK
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

#TextBlob
from textblob import TextBlob

#SpaCy
import spacy
from spacy import displacy

from langdetect import detect



## Get the information from the QUOTEAPI 

In [2]:
url = ("http://127.0.0.1:5000/Data")

In [3]:
all_aut =  requests.get(url)
all_aut.status_code

200

In [4]:
data = all_aut.json()

In [5]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Author,Category,Quote
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o..."
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it..."


## Tokenization 

In [6]:
def tokenize (string):
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(string)
    return tokens

In [7]:
df["Quote_tok"] = df["Quote"].apply(tokenize)

In [8]:
df.head()

Unnamed: 0,Author,Category,Quote,Quote_tok
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...,"[Although, Nature, needs, thousands, or, milli..."
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...,"[I, believe, that, thirty, million, of, these,..."
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...,"[Biology, is, the, study, of, the, complex, th..."
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o...","[If, physics, and, biology, one, day, meet, an..."
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it...","[By, life, we, mean, a, thing, that, can, nour..."


In [9]:
df['Quote_tok'] = df['Quote_tok'].apply(lambda x: " ".join(x))
df.head()

Unnamed: 0,Author,Category,Quote,Quote_tok
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...,Although Nature needs thousands or millions of...
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...,I believe that thirty million of these animalc...
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...,Biology is the study of the complex things in ...
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o...",If physics and biology one day meet and one of...
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it...",By life we mean a thing that can nourish itsel...


## Translating

In [10]:
def intoEnglish(string):
    spanish_string = TextBlob(string)
    try:
        english_blob=spanish_string.translate(from_lang='es',to='en')
        return "".join(list(english_blob))
    except:
        return string

In [11]:
df['Quote_tok'] = df["Quote_tok"].apply(intoEnglish)

In [12]:
df.head()

Unnamed: 0,Author,Category,Quote,Quote_tok
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...,Although Nature needs thousands or millions of...
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...,I believe that thirty million of these animalc...
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...,Biology is the study of the complex things in ...
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o...",If physics and biology one day meet and one of...
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it...",By life we mean a thing that can nourish itsel...


## Stop words

In [13]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/anagarcia/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [23]:
nltk.downloader.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/anagarcia/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [24]:
stop_words = set(stopwords.words('english'))

In [25]:
df["Quote_tok"] = df["Quote_tok"].apply(lambda x: x.split(" "))

In [26]:
def stop_words (lista):
    stop_words = set(stopwords.words('english'))
    nueva_lista = []
    for string in lista:
        if string not in stop_words:
            nueva_lista.append(string)
    return " ".join(nueva_lista)

In [27]:
df["Quote_tok"] = df["Quote_tok"].apply(stop_words)

In [28]:
df.head()

Unnamed: 0,Author,Category,Quote,Quote_tok
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...,Although Nature needs thousands millions years...
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...,I believe thirty million animalcules together ...
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...,Biology study complex things Universe Physics ...
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o...",If physics biology one day meet one two swallo...
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it...",By life mean thing nourish grow decay noscript...


## Sentiment Analysis

In [29]:
def sentimentAnalysis(sentence):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores(sentence)
    pol = polarity['compound']
    return pol

In [35]:
df['SA'] = df["Quote_tok"].apply(sentimentAnalysis)
df

Unnamed: 0,Author,Category,Quote,Quote_tok,SA
0,"Victor Scheffer, 1906 to 2011",science,Although Nature needs thousands or millions of...,Although Nature needs thousands millions years...,-0.3400
1,"Antonie van Leeuwenhoek, 1632 to 1723",science,I believe that thirty million of these animalc...,I believe thirty million animalcules together ...,0.0000
2,"Richard Dawkins, 1941 – present",science,Biology is the study of the complex things in ...,Biology study complex things Universe Physics ...,0.0000
3,"J. B. S. Haldane, 1892 to 1964",science,"If physics and biology one day meet, and one o...",If physics biology one day meet one two swallo...,0.0000
4,"Aristotle, 384 BC to 322 BC",science,"By ‘life,’ we mean a thing that can nourish it...",By life mean thing nourish grow decay noscript...,-0.4019
...,...,...,...,...,...
313,Gabriel García Márquez,literature,Time is the River on which the leaves of our t...,Time River leaves thoughts carried oblivion,0.0000
314,Mark Twain,literature,I told Terry I was leaving. She had been think...,I told Terry I leaving She thinking night resi...,0.7003
315,George Eliot,literature,"Babies, babies, babies. Why did God make so ma...",Babies babies babies Why God make many babies ...,-0.0516
316,Geoffrey Chaucer,literature,"Her heart of compressed ash, which had resiste...",Her heart compressed ash resisted telling blow...,0.0382


In [36]:
df.dtypes

Author        object
Category      object
Quote         object
Quote_tok     object
SA           float64
dtype: object

In [40]:
df_sentimental = df.groupby(["Category"]).mean()

In [41]:
df_sentimental.reset_index()

Unnamed: 0,Category,SA
0,literature,0.04654
1,philosophy,0.139058
2,science,0.16498
