In [1]:
import json
import pandas as pd
import requests
import numpy as np
import random
import neo4j
import time
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [47]:
sns.set_style("whitegrid")

In [15]:
## APIS USED!
# 1. Random Users at randomuser.me/api
# 2. https://quote-garden.herokuapp.com/api
# 3. https://kanye.rest/
# 4. https://taylor.rest/
# 5. https://gameofthronesquotes.xyz/

In [3]:
filename = "random_users.json"
results_wanted = 1000

In [5]:
random_users_base_url = "https://randomuser.me/api?"
random_users_specifics = f"format=json&&nat=us&&noinfo&&inc=gender,name,nat,location,phone,email,dob&&results={results_wanted}"

In [6]:
random_users_final_url = random_users_base_url + random_users_specifics
response = requests.get(url=random_users_final_url)
random_users = json.loads(response.text)["results"]
with open(filename, "w+") as f:
    f.write(json.dumps(random_users, sort_keys=True,indent=4))

In [7]:
df = pd.read_json(filename)
# start new dfs of flattened files
df_dob = df["dob"].apply(pd.Series)
df_location = df["location"].apply(pd.Series)
df_location_good = df_location[["city", "country", "postcode", "state"]]
df_location_coordinates = df_location["coordinates"].apply(pd.Series)
df_location_street = df_location["street"].apply(pd.Series)
df_location_timezone = df_location["timezone"].apply(pd.Series)
df_name = df["name"].apply(pd.Series)

In [8]:
df_refreshed = df.drop(["dob", "name", "location"], axis=1)
df_new = pd.concat([df_refreshed, df_name, df_dob, df_location_good, df_location_street, 
                    df_location_coordinates, df_location_timezone], axis=1)

svc_reps = ["Tim", "Jen", "Dan", "Gina", "Mark", "Jolene", "Kurt", "Patty"]
bank_locs = ["East", "North", "South", "West"]
df_new["svc_rep"] = np.resize(svc_reps,len(df_new))
df_new["bank_loc"] =  np.resize(bank_locs,len(df_new))

In [10]:
# Split number of quotes per api randomly

number_of_quote_apis = 4
def get_result_for_quote_api(results_wanted, number_of_quote_apis):
    quotes_per_api = [''] * number_of_quote_apis
    results_wanted_temp = results_wanted
    for x in range(len(quotes_per_api)):
        if x == number_of_quote_apis - 1:
            ran_num = results_wanted_temp
        else:
            ran_num = random.randint(1, results_wanted_temp)
        quotes_per_api[x] = ran_num
        results_wanted_temp -= ran_num
    return quotes_per_api



Q1, Q2, Q3, Q4 = get_result_for_quote_api(results_wanted, number_of_quote_apis)

assert Q1+Q2+Q3+Q4 == results_wanted, "Missing quote results."

In [36]:
# Create fake transaction data from random generated lat / long 
# API https://pprathameshmore.github.io/QuoteGarden/#get-a-random-quote
# Break up to 10 quotes per request
random_quotes = []
quotes_per_iteration = 10
num_of_iterations = round(Q1 / quotes_per_iteration)

for x in range(0,num_of_iterations):
    response = requests.get(f"https://quote-garden.herokuapp.com/api/v3/quotes?totalQuotes={quotes_per_iteration}")
    quote_response = json.loads(response.text)["data"]
    for i in quote_response:
        quote = i["quoteText"]
        random_quotes.append(quote)

In [12]:
# API https://kanye.rest/
# API is not rate limited. Lets say wait .25 second per request.
kanye_quotes = []
for x in range(Q2):
    time.sleep(.25)
    response = requests.get("https://api.kanye.rest/format=json")
    quote = response.text[10:-2]
    kanye_quotes.append(quote)

In [13]:
# API https://taylor.rest/
# No comment on rate limiting, but lets keep it respectful again
tswift_quotes = []
for x in range(Q3):
    time.sleep(.25)
    response = requests.get("https://api.taylor.rest/")
    quote = response.text[16:-25]
    tswift_quotes.append(quote)

In [14]:
# https://game-of-thrones-quotes.herokuapp.com/v1/random
# This API is nice because you can result the number of random quotes
response = requests.get(f"https://game-of-thrones-quotes.herokuapp.com/v1/random/{Q4}")
got_response = json.loads(response.text)
got_quotes = []
for i in got_response:
    quote = i["sentence"]
    got_quotes.append(quote)

In [38]:
# Create quote list of lists
quote_list = random_quotes + kanye_quotes + tswift_quotes + got_quotes

In [43]:
analyzer = SentimentIntensityAnalyzer()
analysis_results = []

for x in quote_list:
    vs = analyzer.polarity_scores(x)
    analysis_results.append(list(vs.values()))

In [44]:
df_quotes = pd.DataFrame(quote_list, columns =["Quote"]) 
df_analysis = pd.DataFrame(analysis_results, columns=["neg", "neu", "pos", "compound"])

In [45]:
df_total = pd.concat([df_new, df_quotes, df_analysis], axis=1)

In [46]:
df_total

Unnamed: 0,email,gender,nat,phone,first,last,title,age,date,city,...,longitude,description,offset,svc_rep,bank_loc,Quote,neg,neu,pos,compound
0,aubree.graves@example.com,female,US,(062)-623-8315,Aubree,Graves,Mrs,68,1953-03-22T01:19:32.617Z,Sacramento,...,152.7057,"Tokyo, Seoul, Osaka, Sapporo, Yakutsk",+9:00,Tim,East,"All diseases run into one, old age.",0.000,1.000,0.000,0.0000
1,lily.ray@example.com,female,US,(053)-291-6564,Lily,Ray,Mrs,63,1958-07-03T11:45:26.728Z,Spokane,...,-113.8197,"Magadan, Solomon Islands, New Caledonia",+11:00,Jen,North,"Alas, after a certain age every man is respons...",0.135,0.581,0.284,0.3182
2,lois.snyder@example.com,female,US,(710)-588-1256,Lois,Snyder,Ms,31,1990-12-25T00:11:20.411Z,Miami,...,166.8434,"Atlantic Time (Canada), Caracas, La Paz",-4:00,Dan,South,"When grace is joined with wrinkles, it is ador...",0.000,0.607,0.393,0.8658
3,lloyd.reed@example.com,male,US,(154)-516-9382,Lloyd,Reed,Mr,74,1947-05-20T06:43:54.780Z,Altoona,...,144.5967,"Magadan, Solomon Islands, New Caledonia",+11:00,Gina,West,Age is not a particularly interesting subject....,0.115,0.885,0.000,-0.3559
4,eli.sanders@example.com,male,US,(347)-458-7851,Eli,Sanders,Mr,39,1982-08-30T16:20:51.802Z,Cupertino,...,128.3132,Kabul,+4:30,Mark,East,My notion of a wife at 40 is that a man should...,0.000,0.902,0.098,0.3612
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,patrick.fowler@example.com,male,US,(453)-551-0899,Patrick,Fowler,Mr,61,1960-03-10T15:56:31.036Z,Bozeman,...,19.8988,"Western Europe Time, London, Lisbon, Casablanca",0:00,Gina,West,"My sword is yours, in victory and defeat, from...",0.176,0.824,0.000,-0.4588
996,anna.rivera@example.com,female,US,(932)-247-2980,Anna,Rivera,Ms,75,1946-06-15T13:38:40.839Z,Virginia Beach,...,-10.6654,Alaska,-9:00,Mark,East,The only way to keep your people loyal is to m...,0.232,0.588,0.180,-0.3612
997,claire.wade@example.com,female,US,(304)-414-8693,Claire,Wade,Miss,38,1983-11-14T15:23:19.059Z,Roseville,...,105.1301,"Abu Dhabi, Muscat, Baku, Tbilisi",+4:00,Jolene,North,Fear cuts deeper than swords.,0.643,0.357,0.000,-0.6597
998,gordon.dunn@example.com,male,US,(482)-780-6220,Gordon,Dunn,Mr,59,1962-11-01T07:17:16.996Z,Baton Rouge,...,19.6094,"Bombay, Calcutta, Madras, New Delhi",+5:30,Kurt,South,He was no dragon. Fire cannot kill a dragon.,0.157,0.429,0.413,0.5535


In [None]:
# Summary Stats for viz

# Average score for each bank location
df_loc_avg_score = df_total["bank_loc", "compound"].avg(axis=1)
df_loc_avg_score

In [None]:
# Average score for each service rep
df_svc_repo_avg_score = df_total["svc_rep", "compound"].avg(axis=1)
df_svc_repo_avg_score

In [None]:
# Max score each service rep


In [None]:
# Min score for each service rep

In [None]:
# Avg score per bank loc
sns.countplot(x="sex", data=df_total)

In [None]:
# Avg score per age group
g = sns.FacetGrid(data=df_total, col="compound")
g.map(plt.hist,"Age")

In [None]:
# Avg score per svc rep 
sns.countplot(x="svc_rep", data=df_total)

In [None]:
# Avg score per bank location
sns.countplot(x="bank_loc", data=df_total)

In [None]:
# Likelyhood of profanity per compound score
g = sns.FacetGrid(data=df_total, col="Profanity")
g.map(plt.hist,"Compound")

In [None]:
# Recommendations

In [None]:
# Load data into neo4j
# Saving sample file for testing later on
# df_new.to_json("random_users_for_testing.json", orient="records", lines=True)
# df_neo4j = df_total[[]]