In [None]:
## API Request to Get LOTR Data from LOTRAPI ## 

"""
https://the-one-api.herokuapp.com/documentation 
Bearer Token - acess Key

Endpoint	Response	Token required
/book	List of all "The Lord of the Rings" books	no
/book/{id}	Request one specific book	no
/book/{id}/chapter	Request all chapters of one specific book	no
/movie	List of all movies, including the "The Lord of the Rings" and the "The Hobbit" trilogies	yes
/movie/{id}	Request one specific movie	yes
/movie/{id}/quote	Request all movie quotes for one specific movie (only working for the LotR trilogy)	yes
/character	List of characters including metadata like name, gender, realm, race and more	yes
/character/{id}	Request one specific character	yes
/character/{id}/quote	Request all movie quotes of one specific character	yes
/quote	List of all movie quotes	yes
/quote/{id}	Request one specific movie quote	yes
/chapter	List of all book chapters	yes
/chapter/{id}	Request one specific book chapter	yes

"""

In [None]:
# Import Packages #
import io
import json
from lxml import html
import nltk
import numpy as np
import pandas as pd
import random
import re
import requests

from nltk.classify import ClassifierI
from nltk.classify.scikitlearn import SklearnClassifier
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.svm import SVC, LinearSVC, NuSVC
from statistics import mode

In [None]:
# API Variables # 
api_key = "Bearer Key"
endpoint = ["book", "character","quote", "movie"]
api_url_base = "https://the-one-api.herokuapp.com/v1/"
headers = {"Authorization": "Bearer {}".format(api_key)}

# Loop to gather data for each endpoint
for api_end in endpoint:
    # Initialize the url
    api_url = "{}{}".format(api_url_base, api_end)
    
    # Request API 
    r = requests.get(api_url, headers = headers)
    
    # Check status of request
    if r.status_code == 200: 
        # Put the request into json format
        r_json = r.json()
        # Json -> df
        lotr_df = pd.DataFrame(r_json["docs"])
        # Get quote data to be analyzed further
        if api_end == "quote":
            quote_df = lotr_df
        # Export the dataframe to csv for visualization
        lotr_df.to_csv("LOTR_{}_.csv".format(api_end), index=False)
    else: 
        # If the request errors. 
        print("ERROR: Status {} with {} endpoint.".format(r.status_code, api_end))

In [None]:
## Perform Sentiment Analysis for each character within the fellowship ##

# All of the members of the fellowship #  
fellowship_char = ["5cd99d4bde30eff6ebccfea0", # Gandalf
                   "5cd99d4bde30eff6ebccfc15", # Frodo
                   "5cd99d4bde30eff6ebccfd0d", # Samwise
                   "5cd99d4bde30eff6ebccfc7c", # Merry
                   "5cd99d4bde30eff6ebccfe2e", # Pippin
                   "5cd99d4bde30eff6ebccfbe6", # Aragorn
                   "5cd99d4bde30eff6ebccfd23", # Gimli
                   "5cd99d4bde30eff6ebccfd81", # Legolas
                   "5cd99d4bde30eff6ebccfc57", # Boromir
                  ]

# Refine the df to just fellowship members
mask = quote_df["character"].isin(fellowship_char)
fellowship_df = quote_df[~mask]
fellowship_df.shape

In [None]:
## Start Cleaning Text ##

# Create empty column to add cleaned text
fellowship_df["TEXT"] = ""

all_words = []
character_words = []

# Create Stopwords 
stop_words = list(set(stopwords.words('english')))

# Define Parts of Speech to Allow - J: adjective, R: Adverb, V: Verb
allowed_word_types = ["V"] #["J", "R", "V"]

# Iterate through each row in df
for index, row in fellowship_df.iterrows():    
    # Remove Punctations
    cleaned = re.sub(r'[^(a-zA-Z)\s]', '', row["dialog"])
    
    # Tokenize the strings
    tokenized = word_tokenize(cleaned)
    
    # Remove Stopwords from strings
    stopped = [word for word in tokenized if not word in stop_words]
    
    # Tag part of speech for each word
    pos = nltk.pos_tag(stopped)
    
    # make a list of all pos that passed through the filters above
    for word in pos:
        if word[1][0] in allowed_word_types:
            #all_words.append(w[0].lower())
            row["TEXT"] = word[0].lower()

In [None]:
# Counts how many empty values there are in the TEXT column - will eliminate
count_empty = (fellowship_df["TEXT"] == "").sum(axis = 0)
total_count = fellowship_df.shape[0]
print("There are {} empty values out of {} records.".format(count_empty, total_count))

In [None]:
# Eliminate empty values from the fellowship_df
empty_string_mask = (fellowship_df["TEXT"] == "")
fellowship_df = fellowship_df[~empty_string_mask]

# Export to CSV 
fellowship_df.to_csv("LOTR_fellowship_verbs_.csv", index=False)