# Fresh Corners ingestion script prototype

This notebook is intended to prototype code snippets for an ingestion script aimed at "Just Harvest - Fresh Corners Stores.xlsx". 

Dependencies:
* pandas
* xlrd
* os

In [1]:
import pandas as pd
import os

In [2]:
in_path = '../food-data/PFPC_data_files/Just Harvest - Fresh Corners Stores.xlsx'
out_path = '../food-data/Cleaned_data_files/just_harvest_fresh_corners.csv'

final_cols = ['id', 'source_org', 'source_file', 'original_id', 'type', 'name', 'address', 'city', 
              'state', 'zip_code', 'county', 'location_description', 'phone', 'url', 'latitude', 
              'longitude', 'latlng_source', 'date_from', 'date_to', 'SNAP', 'WIC', 'FMNP', 
              'fresh_produce', 'food_bucks', 'free_distribution', 'open_to_spec_group', 'data_issues']

In [3]:
df = pd.read_excel(in_path)
df

Unnamed: 0,Area,Corner Store,Address,City,Zip,Notes,Participates in Food Bucks SNAP Incentive Program
0,Carrick,Juba Grocery,2721 Brownsville Rd,Pittsburgh,15227,,
1,Rankin,Carl's Café,337 5th Ave,Rankin,15104,,yes
2,East Liberty,Farm Fresh Foods,226 N. Negley Ave,Pittsburgh,15206,,
3,Larimer,L.A. Grocery,511 Larimer Ave,Pittsburgh,15206,,
4,McKeesport,Bailey Food Mart,2316 Bailie Ave,McKeesport,15132,,
5,McKees Rocks,In & Out Corner Market,300 Helen St.,McKees Rocks,15136,,yes
6,McKees Rocks,Rocks Express,700 Frederick Street,McKees Rocks,15136,,yes
7,Mt. Oliver,Deann's Groceria,207 Brownsville Rd.,Mount Oliver,15210,,
8,Uptown,\nSchwartz Market,1901 5th Ave,Pittsburgh,15219,,
9,Clairton,Produce Marketplace,519 St Clair Ave,Clairton,15025,,yes


In [4]:
# Assign some columns to schema fields
df['name'] = df['Corner Store'].str.strip('\n')
df['address'] = df['Address']
df['city'] = df['City']
df['zip_code'] = df['Zip']

# Set some fields directly
df['source_org'] = 'Just Harvest'
df['source_file'] = os.path.basename(in_path)
df['type'] = "convenience store" # debatable!
df['state'] = 'PA'
df['county'] = 'Allegheny'
df['FMNP'] = 1 # per rules from Cat
df['fresh_produce'] = 1 # the whole point of the program
df['food_bucks'] = 0 # start with 0 and set relevant sites to 1
df.loc[df['Participates in Food Bucks SNAP Incentive Program'] == 'yes', 'food_bucks'] = 1
df.loc[df['food_bucks'] == 1, 'SNAP'] = 1
df['free_distribution'] = 0
df['data_issues'] = '' # start with blank field, to populate later

# Reorder and add any missing columns
df = df.reindex(columns = final_cols)

# Identify which columns we have handled
handled_cols = df.columns[~df.isna().all()] # i.e. columns that aren't all NA

# Detect and document missingness in handled columns
for col in handled_cols:
    df.loc[df[col].isna(), 'data_issues'] += '{} missing;'.format(col)

# Write out to CSV
df.to_csv(out_path, index = False)

df

Unnamed: 0,id,source_org,source_file,original_id,type,name,address,city,state,zip_code,...,date_from,date_to,SNAP,WIC,FMNP,fresh_produce,food_bucks,free_distribution,open_to_spec_group,data_issues
0,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Juba Grocery,2721 Brownsville Rd,Pittsburgh,PA,15227,...,,,,,1,1,0,0,,SNAP missing;
1,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Carl's Café,337 5th Ave,Rankin,PA,15104,...,,,1.0,,1,1,1,0,,
2,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Farm Fresh Foods,226 N. Negley Ave,Pittsburgh,PA,15206,...,,,,,1,1,0,0,,SNAP missing;
3,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,L.A. Grocery,511 Larimer Ave,Pittsburgh,PA,15206,...,,,,,1,1,0,0,,SNAP missing;
4,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Bailey Food Mart,2316 Bailie Ave,McKeesport,PA,15132,...,,,,,1,1,0,0,,SNAP missing;
5,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,In & Out Corner Market,300 Helen St.,McKees Rocks,PA,15136,...,,,1.0,,1,1,1,0,,
6,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Rocks Express,700 Frederick Street,McKees Rocks,PA,15136,...,,,1.0,,1,1,1,0,,
7,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Deann's Groceria,207 Brownsville Rd.,Mount Oliver,PA,15210,...,,,,,1,1,0,0,,SNAP missing;
8,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Schwartz Market,1901 5th Ave,Pittsburgh,PA,15219,...,,,,,1,1,0,0,,SNAP missing;
9,,Just Harvest,Just Harvest - Fresh Corners Stores.xlsx,,convenience store,Produce Marketplace,519 St Clair Ave,Clairton,PA,15025,...,,,1.0,,1,1,1,0,,
