In [1]:
##############
#Dependencies#
##############

import pandas as pd                              ### import pandas ###
import os                                        ### import operating system ###
import xml.etree.ElementTree as ET               ### xml.etree is a flexible container object,
import gzip                                      ### compress and decompress gzip files ###
import time                                      ### import time libraries ###
import shutil                                    ### Higher level copying and archiving ###
import requests                                  ### Libraries to support HTML requests in python ###



In [2]:
####################################################################
# Defenition to pull Incident Reports and Traffic Detectors from MN DOT
####################################################################
# Request incident information - xml.gz file
# Open, decompress, and decode
# Request traffic detector information - xml.gz file
# Open, decompress, and decode

def download():
    i = requests.get('http://data.dot.state.mn.us/iris_xml/incident.xml.gz')
    with open('data/XMLs/incidents.xml', 'w') as handle:
        handle.write(gzip.decompress(i.content).decode('utf-8'))

    s = requests.get('http://data.dot.state.mn.us/iris_xml/stat_sample.xml.gz')
    with open('data/XMLs/station_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(s.content).decode('ISO-8859-1'))

In [3]:
###################################################
# Defenition to convert information in DataFrames
###################################################
# Identify crash information, try to open csv file and convert to DF, save updated DF as csv
# Identify detector information, try to open as csv and convert to DF, save updated DF as csv



def data_check():

        try:
            with open('data/crash_data.csv', 'r') as CD:
                incidents()
        except FileNotFoundError:
                All_Crash_Data = pd.DataFrame(columns=['Name', 'Date', 'DirectionLocation', 'Road', 'Event'])
                with open('data/crash_data.csv', 'w') as f:
                    All_Crash_Data.to_csv(f, header=True)
                    incidents()

        try:
            with open('data/station_data.csv', 'r') as CD:
                stations()
        except FileNotFoundError:
                station_data = pd.DataFrame(columns=  ["Station","Heading", "Time","Order","Speed","Flow","Lat","Lng"])
                with open('data/station_data.csv', 'w') as f:
                    station_data.to_csv(f, header=True)
                    stations()

In [4]:
###################################################
# Parse incident information and save into csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv

def incidents():
    dates = []
    incident_dirs = []
    roads = []
    locations = []
    names = []
    events = []

    XMLfile = "data/XMLs/incidents.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
        try:
            dates.append(child.attrib['event_date'])
        except KeyError:
            dates.append("NA")
        try:
            names.append(str(child.attrib['name']))
        except KeyError:
            name.append("NA")
        try:
            incident_dirs.append(child.attrib['dir'])
        except KeyError:
            incident_dir.append("NA")
        try:
            roads.append(child.attrib['road'])
        except KeyError:
            roads.append('NA')
        try:
            locations.append(child.attrib['location'])
        except KeyError:
            locations.append("NA")
        try: 
            event = child.attrib['event_type'].split("_", 1)
            events.append(event[1])
        except KeyError:
            events.append("NA")


    DF = pd.DataFrame({"Name" : names,
                       "Date" : dates,
                       "Direction": incident_dirs,
                       "Road" : roads,
                       "Location" : locations,
                       "Event" : events})


    print("Incident Data Parsed")

    with open('data/crash_data.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [5]:
###################################################
# Parse station information and save as csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv
def stations():
    stations = []
    times = []
    flows = []
    speeds = []
    order = []
    headings = []
    lats = []
    lngs = []
    with open('station_keys/Northbound_35W_StationNames.csv', 'r') as NB:
        
        NB_DF = pd.read_csv(NB)
    with open('station_keys/Southbound_35W_StationNames.csv', 'r') as SB:
        SB_DF = pd.read_csv(SB)
    
    XMLfile = "data/XMLs/station_sample.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
    
        if child.attrib['sensor'] in NB_DF["1"].values :
            lats.append(NB_DF.loc[NB_DF['1'] == child.attrib['sensor']]['Lat'].values[0])
            lngs.append(NB_DF.loc[NB_DF['1'] == child.attrib['sensor']]['Lng'].values[0])
        
            headings.append("NB")
            order.append(NB_DF.loc[NB_DF['1'] == child.attrib['sensor']]['Order'].values[0])
            try:
                stations.append(child.attrib['sensor'])
            except KeyError:
                stations.append("NA")

            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['flow'])
            except KeyError:
                flows.append("NA")

            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")
           
        if child.attrib['sensor'] in SB_DF["1"].values:
            lats.append(SB_DF.loc[SB_DF['1'] == child.attrib['sensor']]['Lat'].values[0])
            lngs.append(SB_DF.loc[SB_DF['1'] == child.attrib['sensor']]['Lng'].values[0])
            headings.append("SB")
            order.append(SB_DF.loc[SB_DF['1'] == child.attrib['sensor']]['Order'].values[0])
            try:
                stations.append(child.attrib['sensor'])
            except KeyError:
                stations.append("NA")

            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['flow'])
            except KeyError:
                flows.append("NA")

            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")
            

    DF = pd.DataFrame({"Station" : stations,
                       "Heading": headings,
                        "Time" : times,
                       "Order" : order,
                       "Speed" : speeds,
                       "Flow" : flows,
                      "Lat": lats,
                      "Lng" : lngs })
    with open(f'data/station_data.csv', 'w') as f:
           DF.to_csv(f, header=True)
    print("Station Data Parsed")
    
   

In [6]:
##################################
# Adjust and parse time format
##################################

def time_xml2dt(time_xml):
    from time import mktime
    import time
    from datetime import datetime
    #time_xml='Wed Oct 03 10:13:27 CDT 2018'
    B=time_xml.split()
    B.pop(4)
    B[4]=B[4][2:]
    B_struct=time.strptime(' '.join(B), "%a %b %d  %H:%M:%S %y")
    time_dt=datetime.fromtimestamp(mktime(B_struct))
    return time_dt
time_xml='Wed Oct 03 10:13:27 CDT 2018'
time_xml2dt(time_xml)


                
                

datetime.datetime(2018, 10, 3, 10, 13, 27)

In [7]:
def Route_Summary():
    try:
        Summary = pd.read_csv('data/Route_Summary.csv')
    except FileNotFoundError:
        Summary = pd.DataFrame(columns=["Heading", "Time","Order","Speed","Flow","Lat","Lng"])


        
    All_Station_Data = pd.read_csv('data/station_data.csv')
#     All_Station_Data = All_Station_Data.set_index('Station')
    

    route = All_Station_Data.groupby('Station').head(1).index.values

    for station in route:
            Summary_partial = All_Station_Data.loc[station, 
                                                       ["Station","Heading", "Time","Order","Speed","Flow","Lat","Lng"]]
            Summary = Summary.append(Summary_partial,sort=True)
            Summary = Summary.replace("UNKNOWN",0)
            
 
    Summary = Summary.sort_values(['Station', 'Time'])
    with open('data/Route_Summary.csv', 'w') as f:
        Summary.to_csv(f,header=True, columns=["Station","Heading", "Time","Order","Speed","Flow","Lat","Lng"])
                       
    print("Summary Saved at data/Route_Summary.csv")
     


In [8]:
def config():
    lats = []
    lngs = []
    station_list = []
        
    XMLfile = "data/XMLs/station_config.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
      

    for i in root.iter('corridor'):
        for child in i:
            try:
                station_list.append(child.attrib['station_id'])

            except KeyError:
                station_list.append("no ID")
            try:
                lats.append(child.attrib['lat'])
            except KeyError:
                 lats.append("no ID")
            try:
                lngs.append(child.attrib['lon'])
            except KeyError:
                lngs.append("no ID")



    DF = pd.DataFrame({ "Station":station_list,
    #                        "Label":decription,
                       "Lat":lats, "Lng":lngs,})




    with open('data/station_config.csv', 'w') as f:
        DF.to_csv(f, header=True)
 

In [9]:
############################################
# Identify metro sensor configurations
############################################
# Request xml.gz file, decompress, decode
# with the stat_config.xml, look for a matching station. If not found, write the new station ID to stat_config.csv
try:
        config()
except FileNotFoundError:
    c = requests.get('http://data.dot.state.mn.us/iris_xml/metro_config.xml.gz')
    with open('data/XMLs/station_config.xml', 'w') as handle:
        handle.write(gzip.decompress(c.content).decode('utf-8'))
    Station_Config = pd.DataFrame(columns=['Station', 'Lat', 'Lng'])
    with open('data/station_config.csv', 'w') as f:
        Station_Config.to_csv(f, header=True)
        config()

In [13]:
####################################
#If the program is still running, 
# Print the download is complete
# Print the Parsing is Complete
# Program sleep for 30 seconds
# ####################################

while True:
    download()
    print("download complete")
    data_check()
    Route_Summary()
    print("Parsing Complete, sleeping 30s")
    time.sleep(30)

download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s
download complete
Incident Data Parsed
Station Data Parsed
Summary Saved at data/Route_Summary.csv
Parsing Complete, sleeping 30s


KeyboardInterrupt: 