# Initial Exploration
*June 16th, 2017*  
*by Alan Leggitt (leggitta3@gmail.com)*  

- Initial exploration of [VA PTSD Statistics](https://catalog.data.gov/dataset/va-ptsd-statistics) from [data.gov](https://www.data.gov/)

In [1]:
import json
import os
import pandas as pd
import pprint
from sqlalchemy import create_engine
from urllib import request

In [5]:
# define urls of interest
metadata_url = 'https://catalog.data.gov/harvest/object/417d155b-3332-4fa9-b206-809da7cd02f8'
patient_2015_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_Overview_PTSD_FY15.json'
center_2015_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_AnnualDataSheet_PTSD_FY15.json'
patient_2014_url = 'https://raw.githubusercontent.com/vacobrydsk/VHA-Files/master/NEPEC_Overview_PTSD_FY14.json'
va_location_url = 'https://raw.githubusercontent.com/department-of-veterans-affairs/VHA-Facilities/master/VAFacilityLocation.json'

In [3]:
# display the metadata
with request.urlopen(metadata_url) as url:
    metadata = json.loads(url.read().decode())
pprint.pprint(metadata)

{'@type': 'dcat:Dataset',
 'accessLevel': 'public',
 'bureauCode': ['029:15'],
 'contactPoint': {'@type': 'vcard:Contact',
                  'fn': 'VHA Open Data',
                  'hasEmail': 'mailto:vhaopendata@va.gov'},
 'dataQuality': True,
 'description': 'National-level, VISN-level, and/or VAMC-level statistics on '
                'the numbers and percentages of users of VHA care form the '
                'Northeast Program Evaluation Center (NEPEC).  Some datasets '
                'focus on PTSD others on mental health.  There is no '
                'record-level data.',
 'identifier': 'VA-VHA-10N-014',
 'issued': '2014-03-17',
 'keyword': ['Health',
             'Mental Health',
             'NEPEC',
             'PTSD',
             'VA',
             'VHA',
             'Veteran',
             'disorder',
             'post',
             'stress',
             'traumatic'],
 'landingPage': 'https://www.ptsd.va.gov/about/divisions/evaluation',
 'language': ['en-US'],
 'l

In [6]:
# read json data from urls
patient_2014 = pd.read_json(patient_2014_url)
patient_2015 = pd.read_json(patient_2015_url)
center_2015 = pd.read_json(center_2015_url)

# create database connection
engine = create_engine(
    "mysql://%s:%s@localhost/va_open?charset=utf8" %
    (os.getenv("MYSQL_USER"), os.getenv("MYSQL_PASS")))

ImportError: No module named 'MySQLdb'

In [5]:
# write to mysql database
patient_2014.to_sql('patient_2014', engine, flavor='mysql', if_exists='replace')
patient_2015.to_sql('patient_2015', engine, flavor='mysql', if_exists='replace')
center_2015.to_sql('center_2015', engine, flavor='mysql', if_exists='replace')

In [6]:
with request.urlopen(va_location_url) as url:
    va_location_dict = json.loads(url.read().decode('utf8'))
    
location_data = pd.DataFrame(va_location_dict['VAFacilityData'])
location_data.to_sql('location', engine, flavor='mysql', if_exists='replace')