## Create Commute Statistics Files

In [1]:
import pandas as pd
from pathlib import Path
import zipfile

In [2]:
ODpath = Path("../data/OD/")
OD_file = ODpath.joinpath("od_distance_1k-15k_15-60_miles.csv")
if OD_file.exists ():
    print ("OD file exist")
else:
    print ("OD file does not exist")
    

OD file exist


In [3]:
%time df = pd.read_csv(OD_file, dtype={'w_geocode': str,'h_geocode':str})

Wall time: 45 s


In [4]:
df_w_counts = pd.DataFrame(df,columns=['w_geocode','w_group_count'])

In [5]:
df_w_counts_unique = df_w_counts.drop_duplicates()
df_w_counts_unique.head()

Unnamed: 0,w_geocode,w_group_count
0,10010205001001,1076
233,10030112023027,1069
495,10030115021041,1183
750,10059505002038,1061
1002,10150007002053,1082


#### Classify distance ranges

In [6]:
def assign_distance (df):
    if df.distance < 32000 :
        return 'short_commute'
    if df.distance > 46000 :
        return 'long_commute'
    else :
        return 'medium_commute'

In [7]:
df['commute_length'] = df.apply (lambda df: assign_distance(df), axis=1)


In [8]:
df_geocode = df.drop(['h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon','distance','h_geocode','state'],axis=1)
df_geocode.head()

Unnamed: 0,w_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,commute_length
0,10010205001001,1,0,1,0,0,1,0,0,1,0,short_commute
1,10010205001001,1,1,0,0,1,0,0,0,0,1,short_commute
2,10010205001001,1,1,0,0,0,0,1,0,0,1,short_commute
3,10010205001001,1,0,0,1,0,1,0,0,1,0,short_commute
4,10010205001001,1,0,1,0,1,0,0,0,0,1,short_commute


In [9]:
df_commute = df.drop(['h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon','distance','h_geocode','w_geocode','state'],axis=1)
df_commute.head()

Unnamed: 0,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,commute_length
0,1,0,1,0,0,1,0,0,1,0,short_commute
1,1,1,0,0,1,0,0,0,0,1,short_commute
2,1,1,0,0,0,0,1,0,0,1,short_commute
3,1,0,0,1,0,1,0,0,1,0,short_commute
4,1,0,1,0,1,0,0,0,0,1,short_commute


#### Group the data on commute_length column

In [10]:
df_group_commute = df_commute.groupby(['commute_length']).sum()

df_group_commute.head()

Unnamed: 0_level_0,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03
commute_length,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
long_commute,3089150,613678,1801210,674262,423380,819519,1846251,444765,538644,2105741
medium_commute,2922905,504711,1784069,634125,305315,712102,1905488,426405,421291,2075209
short_commute,3159846,538712,1929433,691701,323521,796876,2039449,431767,445291,2282788


In [11]:
ODpath = Path("../data/OD/")
OD_file_out = ODpath.joinpath("commute_length_1k-15k_od_stats_15-60_miles.csv")
df_group_commute.to_csv(OD_file_out)

#### Sum the records for each state

In [26]:
df_state_group = df.groupby(['state']).sum()

In [27]:
len(df_state_group)

48

In [None]:
df_state_group.columns

In [None]:
df_state_group.drop(['distance','w_group_count','w_lat', 'w_lon', 'h_lat', 'h_lon'], axis=1, inplace=True)
df_state_group.head()

#### Write the state level stats

In [None]:
OD_file_out = ODpath.joinpath("1k-15k_od_state_stats_18-60_miles.csv")
df_state_group.to_csv(OD_file_out)

#### Count the number of records for each state

In [None]:
df['count'] = 1

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df_state_count = df[['state', 'count']]

In [None]:
len(df_state_count)

In [None]:
df_state_counts = df_state_count.groupby(['state']).sum()

In [None]:
df_state_counts

#### Write the state counts

In [None]:
OD_file_out = ODpath.joinpath("1k-15k_od_state_counts_15-60_miles.csv")
df_state_counts.to_csv(OD_file_out)