In [1]:
#Imports
import foursquare
import pandas as pd
import numpy as np
from scripts.constants import *
from scripts import dataframe_builder, api_request

In [2]:
# Collect new random samples
api_request.collect_random_samples(100)

geo location already exists
Ending sample collection after 71 samples collection: Geo location not recieved


In [3]:
# Reconcile dataset with logged API responses
dataframe_builder.update_dataframe()

In [4]:
# Make requests for missing information
api_request.request_missing_information()

In [5]:
#Build data frame for exploration
df = pd.read_csv(SAVE_DIR+RESULTS, index_col=0)
codes = pd.read_csv(SAVE_DIR+"country_codes.csv")
codes.rename(columns={'name':'country','alpha-3':'code'}, inplace=True)
df = pd.merge(df, codes[['country','code']], on='country', how='left')

In [6]:
#Country names that did not match codes
df[df['code'].isna()]['country'].unique()

array(['United Kingdom', nan, 'Iran', 'Russia', 'United States',
       'Visayas', 'Democratic Republic of the Congo', 'North Korea',
       'Cape Verde', 'Venezuela', 'Bolivia', 'Syria', 'Vietnam', 'Laos',
       'Tanzania', 'Luzon'], dtype=object)

In [9]:
#Replace nans with apropriate code
df.loc[df['country'] == 'United Kingdom', 'code'] = 'GBR'
df.loc[df['country'] == 'Iran', 'code'] = 'IRN'
df.loc[df['country'] == 'Russia', 'code'] = 'RUS'
df.loc[df['country'] == 'United States', 'code'] = 'USA'
df.loc[df['country'] == 'Visayas', 'code'] = 'PHL'
df.loc[df['country'] == 'Democratic Republic of the Congo', 'code'] = 'COD'
df.loc[df['country'] == 'North Korea', 'code'] = 'PRK'
df.loc[df['country'] == 'Cape Verde', 'code'] = 'CPV'
df.loc[df['country'] == 'Venezuela', 'code'] = 'VEN'
df.loc[df['country'] == 'Bolivia', 'code'] = 'BOL'
df.loc[df['country'] == 'Syria', 'code'] = 'SYR'
df.loc[df['country'] == 'Vietnam', 'code'] = 'VNM'
df.loc[df['country'] == 'Laos', 'code'] = 'LAO'
df.loc[df['country'] == 'Tanzania', 'code'] = 'TZA'
df = df.dropna()

In [11]:
### Compare Coverage
fs = df[['country','four_square']].groupby('country').sum()
fs_countries = len(fs[fs['four_square']!= 0])
fs_venues = sum(fs['four_square'])

yelp = df[['country','yelp']].groupby('country').sum()
yelp_countries = len(yelp[yelp['yelp']!= 0])
yelp_venues = sum(yelp['yelp'])

google = df[['country','google']].groupby('country').sum()
google_countries = len(google[google['google']!= 0])
google_venues = sum(google['google'])

samples = len(df)
print(f''' 
      From {samples} samples:''')

print(f''' 
      Four Square:
      {fs_venues} venues across {fs_countries} countries
      ''')
print(f''' 
      Google:
      {google_venues} venues across {google_countries} countries
      ''')
print(f''' 
      Yelp:
      {yelp_venues} venues across {yelp_countries} countries
      ''')

 
      From 431 samples:
 
      Four Square:
      2765.0 venues across 52 countries
      
 
      Google:
      158.0 venues across 15 countries
      
 
      Yelp:
      283.0 venues across 11 countries
      


In [12]:
# Group by average for country to country comparison per compnay
# group by sum for comparison between companies
df['total'] = df['yelp'] + df['four_square'] + df['google']
sum_df = df.groupby('code').sum().sort_values('total', ascending=False )
avg_df = df.groupby('code').mean().sort_values('total', ascending=False )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['total'] = df['yelp'] + df['four_square'] + df['google']


In [14]:
# graph function for country comparison
import plotly.graph_objects as go
import pandas as pd

def display_company_choropleth(company, color):
    fig = go.Figure(data=go.Choropleth(
                                        locations = avg_df[avg_df['yelp']!= 0].index,
                                        z = avg_df[avg_df['yelp']!= 0][company],
                                        text = df['code'],
                                        colorscale = color,
                                        marker_line_color='darkgray',
                                        marker_line_width=0.5,
                                        colorbar_title = 'Average results per request',
                                        )
                    )

    fig.update_layout(
                        width=1000,
                        autosize=True,
                        margin=dict(t=50, b=50, l=0, r=0),
                        title_text=f'{company} performance',
                        geo=dict(
                            bgcolor = '#D4D4D4',
                            landcolor = '#D4D4D4',
                            lakecolor = '#D4D4D4',
                            visible=True,
                            showframe=True,
                            showcoastlines=True,
                            projection_type='equirectangular',
                                )

                     )

    fig.data[0].colorbar.x=.9
    fig.data[0].colorbar.y=.59
    fig.show()

In [15]:
display_company_choropleth('four_square','Reds')

In [21]:
display_company_choropleth('google','Blues')

In [17]:
display_company_choropleth('yelp','Greens')

In [19]:
#Create color values for company comparison chart
sum_df['r'] = sum_df['four_square'] / sum_df['total']
sum_df['g'] = sum_df['yelp'] / sum_df['total']
sum_df['b'] = sum_df['google'] / sum_df['total']
sum_df = sum_df.fillna(0)
sum_df

Unnamed: 0_level_0,yelp,four_square,google,total,r,g,b
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
USA,100.0,457.0,0.0,557.0,0.820467,0.179533,0.000000
CAN,9.0,194.0,20.0,223.0,0.869955,0.040359,0.089686
BRA,0.0,214.0,6.0,220.0,0.972727,0.000000,0.027273
DEU,99.0,90.0,20.0,209.0,0.430622,0.473684,0.095694
RUS,0.0,205.0,3.0,208.0,0.985577,0.000000,0.014423
...,...,...,...,...,...,...,...
MRT,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
NAM,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
NER,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
NGA,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000


In [20]:
#company comparison
fig = go.Figure(data=go.Choropleth(

        marker_line_color='darkgray',
        marker_line_width=0.5,
        colorbar_title = 'Average results per request',
    ))

fig.update_layout(
    width=1000,
    autosize=True,
    margin=dict(t=50, b=50, l=0, r=0),
    title_text='Color blending of total results in country.',
    geo=dict(
        bgcolor = '#D4D4D4',
        landcolor = '#D4D4D4',
        lakecolor = '#D4D4D4',
        visible=True,
        showframe=True,
        showcoastlines=True,
        projection_type='equirectangular',
        # framewidth=600
                ),


            )
for code in sum_df.index:
    r = sum_df.loc[code]['r']*255
    g = sum_df.loc[code]['g']*255
    b = sum_df.loc[code]['b']*255
    if r == 0 and g == 0 and b == 0:
        color = 'white'
    else:
        color = f'rgb({r},{g},{b})'
    fig.add_trace(go.Choropleth(locations= [code],
                            z = [1],
                            text = f'Four Square:{round((r/255*100),2)}%, Yelp:{round((g/255*100),2)}%, Google:{round((b/255*100),2)}%',
                            colorscale = [[0, color],[1, color]],
                            colorbar=None,
                            showscale = False))
fig.add_annotation(x=0.85, y=0.7,
            text="Red: Four Square",
            showarrow=False,
            xanchor='left',
            )
fig.add_annotation(x=0.85, y=0.584,
            text="Green: Yelp",
            showarrow=False,
            xanchor='left',
            )
fig.add_annotation(x=0.85, y=0.5,
            text="Blue: Google",
            showarrow=False,
            xanchor='left',
            )

fig.add_annotation(x=0.85, y=0.41,
            text="White: No Results",
            showarrow=False,
            xanchor='left',
            )

fig.show()
    
    
    