In [1]:
##############
#Dependencies#
##############

import pandas as pd                              ### import pandas ###
import os                                        ### import operating system ###
import xml.etree.ElementTree as ET               ### xml.etree is a flexible container object,
                                                 #   designed to store hierarchical data structures in memory.###
import gzip                                      ### compress and decompress gzip files ###
import time                                      ### import time libraries ###
import shutil                                    ### Higher level copying and archiving ###
import requests                                  ### Libraries to support HTML requests in python ###



In [2]:
####################################################################
# Defenition to pull Incident Reports and Traffic Detectors from MN DOT
####################################################################
# Request incident information - xml.gz file
# Open, decompress, and decode
# Request traffic detector information - xml.gz file
# Open, decompress, and decode

def download():
    i = requests.get('http://data.dot.state.mn.us/iris_xml/incident.xml.gz')
    with open('data/XMLs/incidents.xml', 'w') as handle:
        handle.write(gzip.decompress(i.content).decode('utf-8'))
    d = requests.get('http://data.dot.state.mn.us/iris_xml/det_sample.xml.gz')
    with open('data/XMLs/det_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(d.content).decode('ISO-8859-1'))
    s = requests.get('http://data.dot.state.mn.us/iris_xml/stat_sample.xml.gz')
    with open('data/XMLs/station_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(s.content).decode('ISO-8859-1'))

In [11]:
###################################################
# Defenition to convert information in DataFrames
###################################################
# Identify crash information, try to open csv file and convert to DF, save updated DF as csv
# Identify detector information, try to open as csv and convert to DF, save updated DF as csv



def data_check():

        try:
            with open('data/crash_data.csv', 'r') as CD:
                incidents()
        except FileNotFoundError:
                All_Crash_Data = pd.DataFrame(columns=['Name', 'Date', 'DirectionLocation', 'Road', '', 'Event'])
                with open('data/crash_data.csv', 'w') as f:
                    All_Crash_Data.to_csv(f, header=True)
                    incidents()
        try:
            with open('data/detector_data.csv', 'r') as CD:
                detectors()
        except FileNotFoundError:
                Detector_Data = pd.DataFrame(columns=['Sensor', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/detector_data.csv', 'w') as f:
                    Detector_Data.to_csv(f, header=True)
                    detectors()
        try:
            with open('data/station_data.csv', 'r') as CD:
                stations()
        except FileNotFoundError:
                station_data = pd.DataFrame(columns=['Station', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/station_data.csv', 'w') as f:
                    station_data.to_csv(f, header=True)
                    stations()

In [5]:
###################################################
# Parse incident information and save into csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv

def incidents():
    dates = []
    incident_dirs = []
    roads = []
    locations = []
    names = []
    events = []

    XMLfile = "data/XMLs/incidents.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
        try:
            dates.append(child.attrib['event_date'])
        except KeyError:
            dates.append("NA")
        try:
            names.append(str(child.attrib['name']))
        except KeyError:
            name.append("NA")
        try:
            incident_dirs.append(child.attrib['dir'])
        except KeyError:
            incident_dir.append("NA")
        try:
            roads.append(child.attrib['road'])
        except KeyError:
            roads.append('NA')
        try:
            locations.append(child.attrib['location'])
        except KeyError:
            locations.append("NA")
        try: 
            event = child.attrib['event_type'].split("_", 1)
            events.append(event[1])
        except KeyError:
            events.append("NA")


    DF = pd.DataFrame({"Name" : names,
                       "Date" : dates,
                       "Direction": incident_dirs,
                       "Road" : roads,
                       "Location" : locations,
                       "Event" : events})


    print("Incident Data Parsed")

    with open('data/crash_data.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [6]:
###################################################
# Parse detector information and save into csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv

def detectors():
        
        sensors = []
        times = []
        flows = []
        occupancies = []
        speeds = []
        
        XMLfile = "data/XMLs/det_sample.xml"
        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                sensors.append(child.attrib['sensor'])
            except KeyError:
                sensors.append("NA")
            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['flow'])
            except KeyError:
                flows.append("NA")
            try:
                occupancies.append(child.attrib['occ'])
            except KeyError:
                occupancies.append('NA')
            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")



        DF = pd.DataFrame({"Sensor" : sensors,
                            "Time" : times,
                           "Occupancy": occupancies,
                           "Speed" : speeds,
                           "Flow" : flows})

        print("Detector Data Parsed")

        with open('data/detector_data.csv', 'a') as f:
            DF.to_csv(f, header=False)

In [19]:
###################################################
# Parse station information and save as csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv
def stations():
    stations = []
    times = []
    flows = []
    occupancies = []
    speeds = []

    XMLfile = "data/XMLs/station_sample.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
        try:
            stations.append(child.attrib['sensor'])
        except KeyError:
            stations.append("NA")
        try:
            times.append(str(root.attrib['time_stamp']))
        except KeyError:
            times.append("NA")
        try:
            flows.append(child.attrib['flow'])
        except KeyError:
            flows.append("NA")
        try:
            occupancies.append(child.attrib['occ'])
        except KeyError:
            occupancies.append('NA')
        try:
            speeds.append(child.attrib['speed'])
        except KeyError:
            speeds.append("NA")


    DF = pd.DataFrame({"Station" : stations,
                        "Time" : times,
                       "Occupancy": occupancies,
                       "Speed" : speeds,
                       "Flow" : flows})

    print("Station Data Parsed")

    with open('data/station_data.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [20]:
##################################
# Adjust and parse time format
##################################

def time_xml2dt(time_xml):
    from time import mktime
    import time
    from datetime import datetime
    #time_xml='Wed Oct 03 10:13:27 CDT 2018'
    B=time_xml.split()
    B.pop(4)
    B[4]=B[4][2:]
    B_struct=time.strptime(' '.join(B), "%a %b %d  %H:%M:%S %y")
    time_dt=datetime.fromtimestamp(mktime(B_struct))
    return time_dt
time_xml='Wed Oct 03 10:13:27 CDT 2018'
time_xml2dt(time_xml)


                
                

datetime.datetime(2018, 10, 3, 10, 13, 27)

In [21]:
def config():
    decription = []
    station = []
    lats = []
    lngs = []
        
    XMLfile = "data/XMLs/station_config.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()

    for child in root:
        try:
            lats.append(child.attrib['lat'])
        except KeyError:
            continue
        try:
            lngs.append(child.attrib['lon'])
        except KeyError:
            continue

        try:
            decription.append(child.attrib['description'])
        except KeyError:
                decription.append("error")


        try:
            station.append(child.attrib['name'])
        except KeyError:
            station.append("error")                

            ### NODE NAMES ARE FOUND IN CHILD[0][X]#####


    DF = pd.DataFrame({"Label" : decription,
                       "Sensor" : station,
                       "Lat": lats,
                       "Lng" : lngs})

    DF = DF.dropna(thresh=2)

    with open('data/station_config.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [16]:
############################################
# Identify metro sensor configurations
############################################
# Request xml.gz file, decompress, decode
# with the stat_config.xml, look for a matching station. If not found, write the new station ID to stat_config.csv

c = requests.get('http://data.dot.state.mn.us/iris_xml/metro_config.xml.gz')
with open('data/XMLs/station_config.xml', 'w') as handle:
    handle.write(gzip.decompress(c.content).decode('utf-8'))

In [22]:
def Route_Summary():
    try:
        with open('station_config.csv', 'r') as CD:
            config()
    except FileNotFoundError:
            Station_Data = pd.DataFrame(columns=['Label', 'Detectors', 'Lat', 'Lng'])
            with open('data/station_config.csv', 'w') as f:
                Station_Data.to_csv(f, header=True)
                config()
    All_Station_Data = pd.read_csv('data/station_data.csv')
    All_Station_Data = All_Station_Data[["Station", "Time", "Occupancy", "Speed", "Flow"]]
    All_Station_Data = All_Station_Data.set_index('Station')
    # Route_Name = input("  Name Your Route")
    Route = [584,567,583,568,582,569,570,581,580,571,579,572,578,573,577,587]
    Route_Summary = []
    for station in Route:
            Route_Summary.append(All_Station_Data.loc['S'+ str(station), ['Time', 'Occupancy', 'Speed', 'Flow']])
    # for Summary in Route_Summary:
        ## WHAT ARE WE DOING WITH THESE?##
    print(Route_Summary[0]) 

In [23]:
download()
data_check()
Route_Summary()

Incident Data Parsed
Detector Data Parsed
Station Data Parsed


Time         Thu Oct 04 21:19:59 CDT 2018
Occupancy                            4.09
Speed                                  54
Flow                                  440
Name: S584, dtype: object


In [8]:
####################################
#If the program is still running, 
# Print the download is complete
# Print the Parsing is Complete
# Program sleep for 30 seconds
# ####################################

# while True:
#     download()
#     print("download complete")
#     data_check()
#     print("Parsing Complete, sleeping 30s")
#     time.sleep(30)