In [27]:
import pandas as pd
import os
import xml.etree.ElementTree as ET
import gzip
import time
import shutil
import requests


In [28]:
Route = [584,567,583,568,582,569,570,581,580,571,579,572,578,573,577,587]

In [29]:
def download():
    i = requests.get('http://data.dot.state.mn.us/iris_xml/incident.xml.gz')
    with open('data/incidents.xml', 'w') as handle:
        handle.write(gzip.decompress(i.content).decode('utf-8'))
    d = requests.get('http://data.dot.state.mn.us/iris_xml/det_sample.xml.gz')
    with open('data/det_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(d.content).decode('ISO-8859-1'))
    s = requests.get('http://data.dot.state.mn.us/iris_xml/stat_sample.xml.gz')
    with open('data/stat_sample.xml', 'w') as handle:
        handle.write(gzip.decompress(s.content).decode('ISO-8859-1'))

In [30]:
def data_check():
        try:
            with open('data/crash_data.csv', 'r') as CD:
                incidents()
        except FileNotFoundError:
                All_Crash_Data = pd.DataFrame(columns=['Name', 'Date', 'DirectionLocation', 'Road', '', 'Event'])
                with open('data/crash_data.csv', 'w') as f:
                    All_Crash_Data.to_csv(f, header=True)
                    incidents()
        try:
            with open('data/detector_data.csv', 'r') as CD:
                detectors()
        except FileNotFoundError:
                Detector_Data = pd.DataFrame(columns=['Sensor', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/detector_data.csv', 'w') as f:
                    Detector_Data.to_csv(f, header=True)
                    detectors()
        try:
            with open('data/station_data.csv', 'r') as CD:
                stations()
        except FileNotFoundError:
                station_data = pd.DataFrame(columns=['Station', 'Time', 'Occupancy', 'Speed', 'Flow'])
                with open('data/station_data.csv', 'w') as f:
                    station_data.to_csv(f, header=True)
                    stations()


In [31]:
def stations():
        
        stations = []
        times = []
        flows = []
        occupancies = []
        speeds = []
        XMLfile = "data/stat_sample.xml"

        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                stations.append(child.attrib['sensor'])
            except KeyError:
                stations.append("NA")
            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['sample flow'])
            except KeyError:
                flows.append("NA")
            try:
                occupancies.append(child.attrib['occ'])
            except KeyError:
                occupancies.append('NA')
            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")



        DF = pd.DataFrame({"Station" : stations,
                            "Time" : times,
                           "Occupancy": occupancies,
                           "Speed" : speeds,
                           "Flow" : flows})

        print(DF)
        print("Station Data Parsed")

        with open('data/station_data.csv', 'a') as f:
            DF.to_csv(f, header=False)

In [32]:
def incidents():
        XMLfile = "data/incidents.xml"


        dates = []
        incident_dirs = []
        roads = []
        locations = []
        names = []
        events = []

        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                dates.append(child.attrib['event_date'])
            except KeyError:
                dates.append("NA")
            try:
                names.append(str(child.attrib['name']))
            except KeyError:
                name.append("NA")
            try:
                incident_dirs.append(child.attrib['dir'])
            except KeyError:
                incident_dir.append("NA")
            try:
                roads.append(child.attrib['road'])
            except KeyError:
                roads.append('NA')
            try:
                locations.append(child.attrib['location'])
            except KeyError:
                locations.append("NA")
            try: 
                event = child.attrib['event_type'].split("_", 1)
                events.append(event[1])
            except KeyError:
                events.append("NA")



        DF = pd.DataFrame({"Name" : names,
                           "Date" : dates,
                           "Direction": incident_dirs,
                           "Road" : roads,
                           "Location" : locations,
                           "Event" : events})

        print(DF)

        print("Incident Data Parsed")

        with open('data/crash_data.csv', 'a') as f:
            DF.to_csv(f, header=False)


In [33]:
def detectors():
        
        sensors = []
        times = []
        flows = []
        occupancies = []
        speeds = []
        XMLfile = "data/det_sample.xml"

        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                sensors.append(child.attrib['sensor'])
            except KeyError:
                sensors.append("NA")
            try:
                times.append(str(root.attrib['time_stamp']))
            except KeyError:
                times.append("NA")
            try:
                flows.append(child.attrib['sample flow'])
            except KeyError:
                flows.append("NA")
            try:
                occupancies.append(child.attrib['occ'])
            except KeyError:
                occupancies.append('NA')
            try:
                speeds.append(child.attrib['speed'])
            except KeyError:
                speeds.append("NA")



        DF = pd.DataFrame({"Sensor" : sensors,
                            "Time" : times,
                           "Occupancy": occupancies,
                           "Speed" : speeds,
                           "Flow" : flows})

        print(DF)
        print("Detector Data Parsed")

        with open('data/detector_data.csv', 'a') as f:
            DF.to_csv(f, header=False)

In [34]:
def config():
        XMLfile = "data/stat_config.xml"

        decription = []
        station = []
        lats = []
        lngs = []

        parsedXML = ET.parse(XMLfile)
        root = parsedXML.getroot()
        for child in root:
            try:
                lats.append(child.attrib['lat'])
            except KeyError:
                continue
            try:
                lngs.append(child.attrib['lon'])
            except KeyError:
                continue
            
            try:
                decription.append(child.attrib['description'])
            except KeyError:
                    decription.append("error")

        
            try:
                station.append(child.attrib['name'])
            except KeyError:
                station.append("error")
                
                
                
                ### NODE NAMES ARE FOUND IN CHILD[0][X]#####
            
      
        DF = pd.DataFrame({"Label" : decription,
                           "Sensor" : station,
                           "Lat": lats,
                           "Lng" : lngs})
        
        DF = DF.dropna(thresh=2)
        print(DF)


        with open('data/stat_config.csv', 'a') as f:
            DF.to_csv(f, header=False)


In [35]:
c = requests.get('http://data.dot.state.mn.us/iris_xml/metro_config.xml.gz')
with open('data/XMLs/stat_config.xml', 'w') as handle:
    handle.write(gzip.decompress(c.content).decode('utf-8'))
try:
    with open('stat_config.csv', 'r') as CD:
        config()
except FileNotFoundError:
        Station_Data = pd.DataFrame(columns=['Label', 'Detectors', 'Lat', 'Lng'])
        with open('data/stat_config.csv', 'w') as f:
            Station_Data.to_csv(f, header=True)
            config()

                                 Label     Sensor       Lat        Lng
0                 I-35 NB @ 40th Ave W    C35-121  46.74828  -92.14647
1                   I-35 NB @ Oredocks    C35-123  46.75367  -92.13626
2                 I-35 NB @ 29th Ave W    C35-125  46.75718   -92.1305
3             T.H.55 WB @ Winnetka Ave      C5530  44.98319  -93.38063
4               T.H.55 WB @ Douglas Dr      C5533  44.98517  -93.36002
5                 T.H.10 WB (Royalton)     C30079  45.83383  -94.29713
6                 I-35 NB @ 22nd Ave W    C35-132  46.76284  -92.12503
7                     I-35 SB @ T.H.53    C35-136  46.76536  -92.12406
8                 I-35 SB @ 22nd Ave W    C35-131  46.76302  -92.12559
9               T.H.169 NB N of T.H.41     C30061  44.77227  -93.56966
10                 I-35 SB @ Lemond Rd    D6-C130  44.07152  -93.25606
11                    I-35 NB @ U.S.14    D6-C131  44.05876  -93.24958
12               I-35 NB @ Central Ave    C35-100  46.73577  -92.16522
13    

In [None]:
while True:
    download()
    print("download complete")
    data_check()
    print("Parsing Complete, sleeping 30s")
    time.sleep(30)

download complete
                Name                          Date Direction    Road  \
0       L004_9946513  Tue Oct 02 23:42:42 CDT 2018        EB   I-694   
1   2018100217261325  Tue Oct 02 17:26:13 CDT 2018        SB   I-35W   
2   2018100218151677  Tue Oct 02 18:15:16 CDT 2018        SB   I-35W   
3   2018100218161579  Tue Oct 02 18:16:15 CDT 2018        NB   I-35W   
4   2018100219595578  Tue Oct 02 19:59:55 CDT 2018        SB   I-35W   
5   2018100219492669  Tue Oct 02 19:49:26 CDT 2018        EB    I-94   
6   2018100301051236  Wed Oct 03 01:05:12 CDT 2018        SB    I-35   
7       L004_9946476  Tue Oct 02 23:01:32 CDT 2018        SB  T.H.77   
8   2018100221062434  Tue Oct 02 21:06:24 CDT 2018        NB   I-35W   
9       L004_9946465  Tue Oct 02 22:39:42 CDT 2018        NB  T.H.47   
10  2018100218424618  Tue Oct 02 18:42:46 CDT 2018        NB   I-35W   
11  2018100222201222  Tue Oct 02 22:20:12 CDT 2018        WB  T.H.10   
12  2018100223180226  Tue Oct 02 23:18:02 CDT 

download complete
                Name                          Date Direction    Road  \
0       L004_9946513  Tue Oct 02 23:42:42 CDT 2018        EB   I-694   
1   2018100217261325  Tue Oct 02 17:26:13 CDT 2018        SB   I-35W   
2   2018100218151677  Tue Oct 02 18:15:16 CDT 2018        SB   I-35W   
3   2018100218161579  Tue Oct 02 18:16:15 CDT 2018        NB   I-35W   
4   2018100219595578  Tue Oct 02 19:59:55 CDT 2018        SB   I-35W   
5   2018100219492669  Tue Oct 02 19:49:26 CDT 2018        EB    I-94   
6   2018100301051236  Wed Oct 03 01:05:12 CDT 2018        SB    I-35   
7       L004_9946476  Tue Oct 02 23:01:32 CDT 2018        SB  T.H.77   
8   2018100221062434  Tue Oct 02 21:06:24 CDT 2018        NB   I-35W   
9       L004_9946465  Tue Oct 02 22:39:42 CDT 2018        NB  T.H.47   
10  2018100218424618  Tue Oct 02 18:42:46 CDT 2018        NB   I-35W   
11  2018100222201222  Tue Oct 02 22:20:12 CDT 2018        WB  T.H.10   
12  2018100223180226  Tue Oct 02 23:18:02 CDT 

download complete
                Name                          Date Direction    Road  \
0       L004_9946513  Tue Oct 02 23:42:42 CDT 2018        EB   I-694   
1   2018100217261325  Tue Oct 02 17:26:13 CDT 2018        SB   I-35W   
2   2018100218151677  Tue Oct 02 18:15:16 CDT 2018        SB   I-35W   
3   2018100218161579  Tue Oct 02 18:16:15 CDT 2018        NB   I-35W   
4   2018100219595578  Tue Oct 02 19:59:55 CDT 2018        SB   I-35W   
5   2018100219492669  Tue Oct 02 19:49:26 CDT 2018        EB    I-94   
6   2018100301051236  Wed Oct 03 01:05:12 CDT 2018        SB    I-35   
7       L004_9946476  Tue Oct 02 23:01:32 CDT 2018        SB  T.H.77   
8   2018100221062434  Tue Oct 02 21:06:24 CDT 2018        NB   I-35W   
9       L004_9946465  Tue Oct 02 22:39:42 CDT 2018        NB  T.H.47   
10  2018100218424618  Tue Oct 02 18:42:46 CDT 2018        NB   I-35W   
11  2018100222201222  Tue Oct 02 22:20:12 CDT 2018        WB  T.H.10   
12  2018100223180226  Tue Oct 02 23:18:02 CDT 