# Initial Exploration
*June 16th, 2017*  
*by Alan Leggitt (leggitta3@gmail.com)*  

- Initial exploration of [VA PTSD Statistics](https://catalog.data.gov/dataset/va-ptsd-statistics) from [data.gov](https://www.data.gov/)

In [None]:
import json
import os
import pandas as pd
import pprint
from sqlalchemy import create_engine
from urllib import request

In [None]:
# define urls of interest
metadata_url = 'https://catalog.data.gov/harvest/object/417d155b-3332-4fa9-b206-809da7cd02f8'
patient_2015_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_Overview_PTSD_FY15.json'
center_2015_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_AnnualDataSheet_PTSD_FY15.json'
patient_2014_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_Overview_PTSD_FY14.json'
va_location_url = 'https://raw.githubusercontent.com/department-of-veterans-affairs/VHA-Facilities/master/VAFacilityLocation.json'

In [None]:
# display the metadata
with request.urlopen(metadata_url) as url:
    metadata = json.loads(url.read().decode())
pprint.pprint(metadata)

In [None]:
# read json data from urls
patient_2014 = pd.read_json(patient_2014_url)
patient_2015 = pd.read_json(patient_2015_url)
center_2015 = pd.read_json(center_2015_url)

# create database connection
engine = create_engine(
    "mysql://%s:%s@localhost/va_open?charset=utf8" %
    (os.getenv("MYSQL_USER"), os.getenv("MYSQL_PASS")))

In [None]:
# write to mysql database
patient_2014.to_sql('patient_2014', engine, flavor='mysql', if_exists='replace')
patient_2015.to_sql('patient_2015', engine, flavor='mysql', if_exists='replace')
center_2015.to_sql('center_2015', engine, flavor='mysql', if_exists='replace')

In [None]:
with request.urlopen(va_location_url) as url:
    va_location_dict = json.loads(url.read().decode('utf8'))
    
location_data = pd.DataFrame(va_location_dict['VAFacilityData'])
location_data.to_sql('location', engine, flavor='mysql', if_exists='replace')