# Discretionary Income Analysis

## Jupyter Notebook for Visualizations

Refer ReadMe for instructions to install required packages. Plotly package required for the visualizations

In [43]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
from plotly import tools

# Uncomment the following line and set your credentials for plotly. This will enable saving of the plots to your account.
# plotly.tools.set_credentials_file(username='____', api_key='____')
plotly.tools.set_credentials_file(username='tk22466', api_key='rFAoMSGTq2ZmVfRaFI81')

## Distribution of Discretionary Income over United States for Different Industries

In [44]:
'''
Analyze the distribution of discretionary income over United States for occupations:
Management, Computer and Mathematical, Healthcare Practitioners and Techncial, Office and Administrative Support, Installation, Maintenance and Repair

MANAGEMENT: East and Texas have prominently high discretionary income over West

COMPUTER AND MATHEMATICAL: Consistency over the entire US

HEALTHCARE PRACTITIONERS AND TECHNICAL: West wins over the East

OFFICE AND ADMINISTRATIVE SUPPORT: Struggle to make meets end - Negative Discretionary Income for top metropolitan cities

'''
from plot_us_map_slide import plot_US_Map_slider

data_slider, layout = plot_US_Map_slider() #get the data and layout dictionaries
fig = dict(data=data_slider, layout=layout) #create the figure using data and layout setup completed in the imported file
py.iplot(fig, filename='us_map_slider') #display the figure

## Analysis of Trends in Income Components for Different Regions of US

In [57]:
'''
Analyze the patterns between cost of living and quality of life observed in the three broad regions of US:
EAST, WEST and CENTRAL
'''

from plot_line_graph import plot_line_chart_trace

east_df = pd.read_excel('raw_data/QoL_East.xlsx')
east_df_groups = east_df.groupby('state',sort=False)
east_mean_df = east_df_groups.mean().sort_values(by=['Discretionary Income'])

west_df = pd.read_excel('raw_data/QoL_west.xlsx')
west_df_groups = west_df.groupby('state',sort=False)
west_mean_df = west_df_groups.mean().sort_values(by=['Discretionary Income'])

central_df = pd.read_excel('raw_data/QoL_central.xlsx')
central_df_groups = central_df.groupby('state',sort=False)
central_mean_df = central_df_groups.mean().sort_values(by=['Discretionary Income'])

states1 = list(east_mean_df.index)
col1 = east_mean_df['CoL Per Year'].values.tolist()
qol1 = east_mean_df['quality_of_life_index'].values.tolist()

states2 = list(west_mean_df.index)
col2 = west_mean_df['CoL Per Year'].values.tolist()
qol2 = west_mean_df['quality_of_life_index'].values.tolist()

states3 = list(central_mean_df.index)
col3 = central_mean_df['CoL Per Year'].values.tolist()
qol3 = central_mean_df['quality_of_life_index'].values.tolist()

trace_odd = 'Cost of Living'
trace_even = 'Quality of Life'

fig = plot_line_chart_trace(states1, col1, qol1, states2, col2, qol2, states3, col3, qol3, trace_odd, trace_even)

fig['layout'].update(title='Comparison of Cost of Living with Quality of Life')
py.iplot(fig, filename='col_qol')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3           -      ]



In [58]:
'''
Analyze the patterns between quality of life and discretionary income observed in the three broad regions of US:
EAST, WEST and CENTRAL
'''

east_df = pd.read_excel('raw_data/DI_East.xlsx')
east_df_groups = east_df.groupby('state',sort=False)
east_mean_df = east_df_groups.mean().sort_values(by=['Discretionary Income'])

west_df = pd.read_excel('raw_data/DI_west.xlsx')
west_df_groups = west_df.groupby('state',sort=False)
west_mean_df = west_df_groups.mean().sort_values(by=['Discretionary Income'])

central_df = pd.read_excel('raw_data/DI_central.xlsx')
central_df_groups = central_df.groupby('state',sort=False)
central_mean_df = central_df_groups.mean().sort_values(by=['Discretionary Income'])

states1 = list(east_mean_df.index)
di1 = east_mean_df['Discretionary Income'].values.tolist()
qol1 = east_mean_df['quality_of_life_index'].values.tolist()

states2 = list(west_mean_df.index)
di2 = west_mean_df['Discretionary Income'].values.tolist()
qol2 = west_mean_df['quality_of_life_index'].values.tolist()

states3 = list(central_mean_df.index)
di3 = central_mean_df['Discretionary Income'].values.tolist()
qol3 = central_mean_df['quality_of_life_index'].values.tolist()

trace_odd = 'Discretionary Income'
trace_even = 'Quality of Life'

fig = plot_line_chart_trace(states1, di1, qol1, states2, di2, qol2, states3, di3, qol3, trace_odd, trace_even)

fig['layout']['yaxis1'].update(range=[0, 1])
fig['layout']['yaxis2'].update(range=[0, 1])

fig['layout'].update(title='Comparison of Quality of Life with Discretionary Income')
py.iplot(fig, filename='qol_di')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3           -      ]



## Comparison of Top Metropolitan Cities with their Neighbors in that State

In [49]:
'''
Analyze discretionary income of metropolitan cities in comparison with their neighboring cities with better 
quality of life and lower population
Population Decreases and Quality of Life Increases from First City to Last
'''

from plot_bar_metropolitan import plot_metropolitan_data, plot_metropolitan_trace

Cities, city1, city2, city3 = plot_metropolitan_data()
color1 = ['rgb(128,0,0)','rgba(204,204,204,1)','rgba(204,204,204,1)','rgba(204,204,204,1)']
color2 = ['rgb(230,25,75)','rgba(204,204,204,1)','rgba(204,204,204,1)','rgba(204,204,204,1)']
color3 = ['rgb(250,190,190)','rgba(204,204,204,1)','rgba(204,204,204,1)','rgba(204,204,204,1)']

data, layout = plot_metropolitan_trace(Cities, city1, city2, city3, color1, color2, color3)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='di_main_cities_ny')

In [50]:
color1 = ['rgb(128,0,0)','rgb(128,0,0)','rgba(204,204,204,1)','rgba(204,204,204,1)']
color2 = ['rgb(230,25,75)','rgb(230,25,75)','rgba(204,204,204,1)','rgba(204,204,204,1)']
color3 = ['rgb(250,190,190)','rgb(250,190,190)','rgba(204,204,204,1)','rgba(204,204,204,1)']

data, layout = plot_metropolitan_trace(Cities, city1, city2, city3, color1, color2, color3)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='di_main_cities_ca')

In [51]:
color1 = ['rgb(128,0,0)','rgb(128,0,0)','rgb(128,0,0)','rgba(204,204,204,1)']
color2 = ['rgb(230,25,75)','rgb(230,25,75)','rgb(230,25,75)','rgba(204,204,204,1)']
color3 = ['rgb(250,190,190)','rgb(250,190,190)','rgb(250,190,190)','rgba(204,204,204,1)']

data, layout = plot_metropolitan_trace(Cities, city1, city2, city3, color1, color2, color3)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='di_main_cities_tx')

In [52]:
color1 = ['rgb(128,0,0)','rgb(128,0,0)','rgb(128,0,0)','rgb(128,0,0)']
color2 = ['rgb(230,25,75)','rgb(230,25,75)','rgb(230,25,75)','rgb(230,25,75)']
color3 = ['rgb(250,190,190)','rgb(250,190,190)','rgb(250,190,190)','rgb(250,190,190)']

data, layout = plot_metropolitan_trace(Cities, city1, city2, city3, color1, color2, color3)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='di_main_cities_fl')

## Use Case of our Discretionary Income Analysis Tool

In [45]:
'''
Consider four sample data science jobs:
1. Game Changer - New York City, NY; Salary - 134,958$/year
2. Yelp - San Francisco, CA; Salary - 130,405$/year
3. Apple - Austin, TX; Salary - 127,095$/year
4. IBM - Raleigh, NC; Salary - 112,045$/year

Analyze the best job offer
'''

from plot_bar_job_sample import bar_plot_data_format1, bar_plot_data_format2, bar_plot_trace_create

NY, SF, TX, NC, Income = bar_plot_data_format1()
data, layout = bar_plot_trace_create(Income, NY, SF, TX, NC, 'Income breakdown for Job Postings')

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='job_sample_bar_comp1')

In [46]:
NY, SF, TX, NC, Income = bar_plot_data_format2()
data, layout = bar_plot_trace_create(Income, NY, SF, TX, NC, 'Discretionary Income and Quality of Life Comparison')

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='job_sample_bar_comp2')

In [47]:
from plot_pie_chart_job_sample import plot_pie_chart

fig = plot_pie_chart()
py.iplot(fig, filename='pie_chart_job_sample')