In [1]:
##############
#Dependencies#
##############

import pandas as pd                              ### import pandas ###
import os                                        ### import operating system ###
import xml.etree.ElementTree as ET               ### xml.etree is a flexible container object,
                                                 #   designed to store hierarchical data structures in memory.###
import gzip                                      ### compress and decompress gzip files ###
import time                                      ### import time libraries ###
import shutil                                    ### Higher level copying and archiving ###
import requests                                  ### Libraries to support HTML requests in python ###



In [2]:
####################################################################
# Defenition to pull Incident Reports and Traffic Detectors from MN DOT
####################################################################
# Request incident information - xml.gz file
# Open, decompress, and decode
# Request traffic detector information - xml.gz file
# Open, decompress, and decode

def download():
    i = requests.get('http://data.dot.state.mn.us/iris_xml/incident.xml.gz')
    with open('data/XMLs/incidents.xml', 'w') as handle:
        handle.write(gzip.decompress(i.content).decode('utf-8'))
    d = requests.get('http://data.dot.state.mn.us/iris_xml/det_sample.xml.gz')
    with open('data/XMLs/det_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(d.content).decode('ISO-8859-1'))
    s = requests.get('http://data.dot.state.mn.us/iris_xml/stat_sample.xml.gz')
    with open('data/XMLs/station_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(s.content).decode('ISO-8859-1'))

In [3]:
###################################################
# Defenition to convert information in DataFrames
###################################################
# Identify crash information, try to open csv file and convert to DF, save updated DF as csv
# Identify detector information, try to open as csv and convert to DF, save updated DF as csv



def data_check():

        try:
            with open('data/crash_data.csv', 'r') as CD:
                incidents()
        except FileNotFoundError:
                All_Crash_Data = pd.DataFrame(columns=['Name', 'Date', 'DirectionLocation', 'Road', '', 'Event'])
                with open('data/crash_data.csv', 'w') as f:
                    All_Crash_Data.to_csv(f, header=True)
                    incidents()
        try:
            with open('data/detector_data.csv', 'r') as CD:
                detectors()
        except FileNotFoundError:
                Detector_Data = pd.DataFrame(columns=['Sensor', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/detector_data.csv', 'w') as f:
                    Detector_Data.to_csv(f, header=True)
                    detectors()
        try:
            with open('data/station_data.csv', 'r') as CD:
                stations()
        except FileNotFoundError:
                station_data = pd.DataFrame(columns=['Station', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/station_data.csv', 'w') as f:
                    station_data.to_csv(f, header=True)
                    stations()

In [4]:
###################################################
# Parse incident information and save into csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv

def incidents():
    dates = []
    incident_dirs = []
    roads = []
    locations = []
    names = []
    events = []

    XMLfile = "data/XMLs/incidents.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
        try:
            dates.append(child.attrib['event_date'])
        except KeyError:
            dates.append("NA")
        try:
            names.append(str(child.attrib['name']))
        except KeyError:
            name.append("NA")
        try:
            incident_dirs.append(child.attrib['dir'])
        except KeyError:
            incident_dir.append("NA")
        try:
            roads.append(child.attrib['road'])
        except KeyError:
            roads.append('NA')
        try:
            locations.append(child.attrib['location'])
        except KeyError:
            locations.append("NA")
        try: 
            event = child.attrib['event_type'].split("_", 1)
            events.append(event[1])
        except KeyError:
            events.append("NA")


    DF = pd.DataFrame({"Name" : names,
                       "Date" : dates,
                       "Direction": incident_dirs,
                       "Road" : roads,
                       "Location" : locations,
                       "Event" : events})


    print("Incident Data Parsed")

    with open('data/crash_data.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [5]:
###################################################
# Parse detector information and save into csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv

def detectors():
        
        sensors = []
        times = []
        flows = []
        occupancies = []
        speeds = []
        
        XMLfile = "data/XMLs/det_sample.xml"
        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                sensors.append(child.attrib['sensor'])
            except KeyError:
                sensors.append("NA")
            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['flow'])
            except KeyError:
                flows.append("NA")
            try:
                occupancies.append(child.attrib['occ'])
            except KeyError:
                occupancies.append('NA')
            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")



        DF = pd.DataFrame({"Sensor" : sensors,
                            "Time" : times,
                           "Occupancy": occupancies,
                           "Speed" : speeds,
                           "Flow" : flows})

        print("Detector Data Parsed")

        with open('data/detector_data.csv', 'a') as f:
            DF.to_csv(f, header=False)

In [6]:
###################################################
# Parse station information and save as csv
###################################################

## Create lists, append lists if data exists otherwise enter NA, combine data as DF, save as csv
def stations():
    stations = []
    times = []
    flows = []
    occupancies = []
    speeds = []

    XMLfile = "data/XMLs/station_sample.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
    for child in root:
        try:
            stations.append(child.attrib['sensor'])
        except KeyError:
            stations.append("NA")
        try:
            times.append(str(root.attrib['time_stamp']))
        except KeyError:
            times.append("NA")
        try:
            flows.append(child.attrib['flow'])
        except KeyError:
            flows.append("NA")
        try:
            occupancies.append(child.attrib['occ'])
        except KeyError:
            occupancies.append('NA')
        try:
            speeds.append(child.attrib['speed'])
        except KeyError:
            speeds.append("NA")


    DF = pd.DataFrame({"Station" : stations,
                        "Time" : times,
                       "Occupancy": occupancies,
                       "Speed" : speeds,
                       "Flow" : flows})

    print("Station Data Parsed")

    with open('data/station_data.csv', 'a') as f:
        DF.to_csv(f, header=False)

In [7]:
##################################
# Adjust and parse time format
##################################

def time_xml2dt(time_xml):
    from time import mktime
    import time
    from datetime import datetime
    #time_xml='Wed Oct 03 10:13:27 CDT 2018'
    B=time_xml.split()
    B.pop(4)
    B[4]=B[4][2:]
    B_struct=time.strptime(' '.join(B), "%a %b %d  %H:%M:%S %y")
    time_dt=datetime.fromtimestamp(mktime(B_struct))
    return time_dt
time_xml='Wed Oct 03 10:13:27 CDT 2018'
time_xml2dt(time_xml)


                
                

datetime.datetime(2018, 10, 3, 10, 13, 27)

In [8]:
def config():
#     decription = []
    lats = []
    lngs = []
    station_list = []
        
    XMLfile = "data/XMLs/station_config.xml"
    parsedXML = ET.parse(XMLfile)
    root = parsedXML.getroot()
      

    for i in root.iter('corridor'):
        for child in i:
            try:
                station_list.append(child.attrib['station_id'])

            except KeyError:
                station_list.append("no ID")
            try:
                lats.append(child.attrib['lat'])
            except KeyError:
                 lats.append("no ID")
            try:
                lngs.append(child.attrib['lon'])
            except KeyError:
                lngs.append("no ID")

#             try:
#                 decription.append(child.attrib['description'])
#             except KeyError:
#                 decription.append("error")



    DF = pd.DataFrame({ "Station":station_list,
    #                        "Label":decription,
                       "Lat":lats, "Lng":lngs,})




    with open('data/station_config.csv', 'w') as f:
        DF.to_csv(f, header=True)
 

In [9]:
############################################
# Identify metro sensor configurations
############################################
# Request xml.gz file, decompress, decode
# with the stat_config.xml, look for a matching station. If not found, write the new station ID to stat_config.csv
try:
        config()
except FileNotFoundError:
    c = requests.get('http://data.dot.state.mn.us/iris_xml/metro_config.xml.gz')
    with open('data/XMLs/station_config.xml', 'w') as handle:
        handle.write(gzip.decompress(c.content).decode('utf-8'))
    Station_Config = pd.DataFrame(columns=['Station', 'Lat', 'Lng'])
    with open('data/station_config.csv', 'w') as f:
        Station_Config.to_csv(f, header=True)
        config()

In [10]:
def Route_Summary():
    Route = ['S567','S583','S568','S582','S569','S570','S581',
'S580','S571','S579','S572','S578','S573',"S577","S587"]
    try:
        Summary = pd.read_csv('data/Route_Summary.csv')
    except FileNotFoundError:
        Summary = pd.DataFrame(columns=["Station","Time","Occupancy","Speed","Flow"])



        
    All_Station_Data = pd.read_csv('data/station_data.csv')
    All_Station_Data = All_Station_Data[["Station", "Time", "Occupancy", "Speed", "Flow"]]
    All_Station_Data = All_Station_Data.set_index('Station')
    Summary = Summary.set_index('Station')
    print(Summary)

    


    for station in Route:
    #             Route_Summary.append(Station_Config.loc[station, ['Lat', 'Lng']])
    # we can grab it here, but repeats. better to grab as needed when graphing
            Summary_partial = All_Station_Data.loc[station, 
                                                       ['Station','Time', 'Occupancy', 'Speed', 'Flow']]
            Summary = Summary.append(Summary_partial)

#     Summary.sort_values(['Station','Time'])



    with open('data/Route_Summary.csv', 'w') as f:
        Summary.to_csv(f,header=True, columns=['Station',"Time", "Occupancy", "Speed", "Flow"])
                       
    # for Summary in Route_Summary:
        ## WHAT ARE WE DOING WITH THESE?##
     


In [11]:
download()
data_check()
Route_Summary()

Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Empty DataFrame
Columns: [Time, Occupancy, Speed, Flow]
Index: []


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return getattr(section, self.name)[new_key]


In [12]:
####################################
#If the program is still running, 
# Print the download is complete
# Print the Parsing is Complete
# Program sleep for 30 seconds
# ####################################

while True:
    download()
    print("download complete")
    data_check()
    print("Parsing Complete, sleeping 30s")
    Route_Summary()
    time.sleep(30)

download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

download complete
Incident Data Parsed
Detector Data Parsed
Station Data Parsed
Parsing Complete, sleeping 30s
         Station.1                          Time Occupancy    Speed     Flow
Station                                                                     
S567           NaN  Sat Oct 06 22:09:29 CDT 2018      3.54       64      520
S583           NaN  Sat Oct 06 22:09:29 CDT 2018      4.67       56      570
S568           NaN  Sat Oct 06 22:09:29 CDT 2018      3.61       48      480
S582           NaN  Sat Oct 06 22:09:29 CDT 2018      4.31       74      760
S569           NaN  Sat Oct 06 22:09:29 CDT 2018      3.86       78      780
S570           NaN  Sat Oct 06 22:09:29 CDT 2018      4.81       76      840
S581           NaN  Sat Oct 06 22:09:29 CDT 2018       2.8       74      480
S580           NaN  Sat Oct 06 22:09:29 CDT 2018   UNKNOWN  UNKNOWN  UNKNOWN
S571           NaN  Sat Oct 06 22:09:29 CDT 2018      8.65       60      680
S579           NaN  Sat Oct 06 22:09:29 CD

PermissionError: [Errno 13] Permission denied: 'data/Route_Summary.csv'