In [128]:
# import packages
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import linregress
import matplotlib.pyplot as plt
import matplotlib.cm as cm 
from matplotlib.colors import ListedColormap
import plotly
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
import plotly.tools as tls
plotly.offline.init_notebook_mode(connected=True)

In [145]:
import dash
import dash_core_components as dcc
import dash_html_components as html

In [12]:
# loading datasets
ycom = pd.read_csv('YCOM_2018_Data.csv', encoding='latin-1')
census = pd.read_csv('us-census-demographic-data/acs2015_county_data.csv')

In [13]:
# Deselct Puerto Rico from census, since Ycom data doesn't cover this state.
census = census.iloc[:3142]

In [14]:
# selecting only the county rows and reseting the index.
ycom_county = ycom.loc[ycom['GeoType'] == 'County']
ycom_county = ycom_county.reset_index(drop=True)

In [15]:
#ycom_county[['County','happening','TotalPop']]

In [16]:
# Separating the counties and states from 'GeoType' column and add those as
# separated columns to the 'ycom_county' dataframe.
county_state_sep = pd.DataFrame(ycom_county.GeoName.str.split(',').tolist())
ycom_county['State'] = county_state_sep[1]
ycom_county['County'] = county_state_sep[0]

In [17]:
# Dropping 'county' and 'Parish' words and the last whitespace
# from the counties.
ycom_county['County'] = ycom_county['County'].str.replace('County', '')
ycom_county['County'] = ycom_county['County'].str.replace('Parish', '')
ycom_county['County'] = ycom_county['County'].str.strip()

In [18]:
# test if the counties are the same and in order in two datasets.
if np.all(ycom_county['County'] == census['County']):
    print('Yaaaaay!')

Yaaaaay!


In [20]:
# getting one dataframe from the two datasets 
data2 = pd.concat(([ycom_county,census]),axis = 1)

In [171]:
n_ycom = list(ycom_county)[3:-2]
n_census = list(census)[4:]

In [22]:
#Pearson's correlation coefficient,2-tailed p-value
stats_outputs = np.zeros((len(n_ycom),len(n_census),5))
stats_outputs_standard = np.zeros((len(n_ycom),len(n_census),5))
for x in range(len(n_ycom)):
    for y in range(len(n_census)):
        #nans when ny (census) index is 9,10,14 ie. income, incomeErr, childpoverty
        #reason is Loving Texas (not kidding), ind=2673, a county with no data for these variables
        #census.Income is same as #census[ny[9]]
        #n.b. if missing values are in census for given variable then county is ignored for that calculation
        ycom_notnull = ycom_county[n_ycom[x]][census[n_census[y]].notnull()]
        census_notnull = census[n_census[y]][census[n_census[y]].notnull()]
        
        #also doing calculations on standardized variables #standardized_column = (column - mean(column)) / std(column)
        ycom_standard = (ycom_notnull - np.mean(ycom_notnull)) / np.std(ycom_notnull)
        census_standard = (census_notnull - np.mean(census_notnull)) / np.std(census_notnull)
        
        
        stats_outputs[x,y,:] = linregress(ycom_notnull, census_notnull)
        stats_outputs_standard[x,y,:] = linregress(ycom_standard, census_standard)
        

print('size of stats array created')        
print('#ycom,#census,#stattypes')        
print(stats_outputs.shape)

size of stats array created
#ycom,#census,#stattypes
(56, 33, 5)


In [23]:
color_map = plt.cm.RdYlBu(np.linspace(0, 1.1, 20))
color_map = ListedColormap(color_map[::-1]) 

color_map_1 = plt.cm.hot(np.linspace(0, 1.1, 20))
color_map_1 = ListedColormap(color_map_1[::-1]) 

In [26]:
trace = go.Heatmap(z=[[1, 20, 30],
                      [20, 1, 60],
                      [30, 60, 1]])
trace

Heatmap({
    'z': [[1, 20, 30], [20, 1, 60], [30, 60, 1]]
})

In [29]:
data=[trace]
data

[Heatmap({
     'z': [[1, 20, 30], [20, 1, 60], [30, 60, 1]]
 })]

In [84]:
#
#data = [go.Heatmap( z=stats_outputs[:,:,2].tolist(), colorscale='Viridis')]
#data=[trace]
#py.iplot(data)
#

#plotly.offline.iplot({
z=stats_outputs[:,:,2]
fig = ff.create_annotated_heatmap(z)
py.iplot(fig, filename='annotated_heatmap',annotation_text='')
#})

AttributeError: 'tuple' object has no attribute 'font'

In [126]:
data=[go.Heatmap(z=stats_outputs[:,:,2], x=n_census, y=n_ycom, colorscale = 'Portland')]
layout=go.layout.Image(sizex=100, sizey=100)
py.iplot(data,layout=layout)

In [144]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('Testing')
plotly_fig = tls.mpl_to_plotly( fig )
trace = dict(z=stats_outputs[:,:,2], type="heatmap", zmin=-1, zmax=1, colorscale = 'Portland')
plotly_fig.add_traces([trace])
plotly_fig['layout']['xaxis'].update({'autorange':True})
plotly_fig['layout']['yaxis'].update({'autorange':True})
plotly_fig['layout'].update({'height':500})
plotly_fig['layout'].update({'width':500})
py.iplot(plotly_fig)

In [146]:
#hey check this out for interactive stuff https://dash.plot.ly/interactive-graphing

In [178]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Dropdown(
                id='crossfilter-xaxis-column',
                options=[{'label': i, 'value': i} for i in n_census],
                value='Men'
            ),
        ],
        style={'width': '49%', 'display': 'inline-block'}),

        html.Div([
            dcc.Dropdown(
                id='crossfilter-yaxis-column',
                options=[{'label': i, 'value': i} for i in n_ycom],
                value='happening'
            ),
        ], style={'width': '49%', 'float': 'right', 'display': 'inline-block'})
    ], style={
        'borderBottom': 'thin lightgrey solid',
        'backgroundColor': 'rgb(250, 250, 250)',
        'padding': '10px 5px'
    }),

    html.Div([
        dcc.Graph(
            id='crossfilter-indicator-scatter',
            hoverData={'points': [{'customdata': 'Japan'}]}
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    html.Div([
        dcc.Graph(id='x-time-series'),
        dcc.Graph(id='y-time-series'),
    ], style={'display': 'inline-block', 'width': '49%'}),
])


@app.callback(
    dash.dependencies.Output('crossfilter-indicator-scatter', 'figure'),
    [dash.dependencies.Input('crossfilter-xaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-xaxis-type', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-type', 'value')])

def update_graph(census, ycom, census_name, ycom_name):
    return {
        'data': [go.Heatmap(
            x=census[census_name],
            y=ycom[ycom_name],
            #text=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],
            #customdata=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],
            colorscale='Viridis',
            }
        )],
        'layout': go.Layout(
            margin={'l': 40, 'b': 30, 't': 10, 'r': 0},
            height=450,
            hovermode='closest'
        )
    }


def create_time_series(census, ycom, title):
    return {
        'data': [go.Scatter(
            x=census[census_name],,
            y=ycom[ycom_name],
        )],
        'layout': {
            'height': 225,
            'margin': {'l': 20, 'b': 30, 'r': 10, 't': 10},
            'annotations': [{
                'x': 0, 'y': 0.85, 'xanchor': 'left', 'yanchor': 'bottom',
                'xref': 'paper', 'yref': 'paper', 'showarrow': False,
                'align': 'left', 'bgcolor': 'rgba(255, 255, 255, 0.5)',
                'text': title
            }],
            'xaxis': {'showgrid': False}
        }
    }


@app.callback(
    dash.dependencies.Output('x-time-series', 'figure'),
    [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),
     dash.dependencies.Input('crossfilter-xaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-xaxis-type', 'value')])
#def update_y_timeseries(hoverData, xaxis_column_name, axis_type):
    #country_name = hoverData['points'][0]['customdata']
    #dff = df[df['Country Name'] == country_name]
    #dff = dff[dff['Indicator Name'] == xaxis_column_name]
    #title = '<b>{}</b><br>{}'.format(country_name, xaxis_column_name)
    #return create_time_series(dff, axis_type, title)


@app.callback(
    dash.dependencies.Output('y-time-series', 'figure'),
    [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),
     dash.dependencies.Input('crossfilter-yaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-type', 'value')])
def update_x_timeseries(hoverData, yaxis_column_name, axis_type):
    dff = df[df['Country Name'] == hoverData['points'][0]['customdata']]
    dff = dff[dff['Indicator Name'] == yaxis_column_name]
    return create_time_series(dff, axis_type, yaxis_column_name)


if __name__ == '__main__':
    app.run_server(debug=True)

SyntaxError: invalid syntax (<ipython-input-178-8346a9264d24>, line 58)

In [163]:
ycom.head()


0       56.902
1       50.774
2       52.169
3       55.654
4       50.788
5       63.332
6       56.957
7       60.877
8       57.104
9       64.117
10      56.250
11      54.283
12      64.424
13      51.453
14      58.898
15      50.736
16      53.108
17      52.622
18      49.220
19      51.549
20      54.989
21      61.446
22      61.956
23      56.934
24      55.329
25      52.390
26      52.516
27      50.121
28      50.841
29      56.557
         ...  
4532    54.245
4533    46.579
4534    52.048
4535    48.632
4536    54.610
4537    52.021
4538    49.768
4539    50.327
4540    46.311
4541    48.573
4542    53.439
4543    48.344
4544    55.249
4545    51.100
4546    51.015
4547    56.146
4548    52.202
4549    52.779
4550    44.965
4551    52.252
4552    59.111
4553    54.451
4554    57.476
4555    50.776
4556    49.796
4557    53.187
4558    55.033
4559    57.061
4560    49.309
4561    68.462
Name: human, Length: 4562, dtype: float64

In [177]:
census['Men']

0        26745
1        95314
2        14497
3        12073
4        28512
5         5660
6         9502
7        56274
8        16258
9        12975
10       21619
11        6382
12       11834
13        6671
14        7334
15       25174
16       26303
17        6176
18        5579
19       18339
20        6863
21       40081
22       24708
23       19450
24       35474
25       39362
26       19524
27       50207
28        8477
29       15311
         ...  
3112      7799
3113     65880
3114    193441
3115     26378
3116     12783
3117     85019
3118     36594
3119     19692
3120      6002
3121     24914
3122      8592
3123      7118
3124      3716
3125     20358
3126      7180
3127      2394
3128      4525
3129     47911
3130      9356
3131     40495
3132      1164
3133     14230
3134      4495
3135     14695
3136      5353
3137     23359
3138     11537
3139     10636
3140      4144
3141      3695
Name: Men, Length: 3142, dtype: int64

In [174]:
n_census[0]

'Men'