# Project 5: Optimizing Evacuation Routes using Real-Time Traffic Information

Michael Daugherty, Kelly Slatery, Song May | US-DSI-10 | 02.21.2020

## Get Coordinates

In [1]:
# Imports
import pandas as pd
import numpy as np
import re
import spacy

In [2]:
# Set view options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

# Import Data

In [3]:
full_df = pd.read_csv('./data/final_data_with_predictions.csv')
full_df.shape

(3087, 4)

In [4]:
full_df.head()

Unnamed: 0,dates&time,user,tweet,class
0,2017-08-31 21:34:27+00:00,DallasPD,"Reminder: Texas State Law prohibiting texting and driving goes into effect September 1, 2017",0
1,2017-08-31 21:03:18+00:00,DallasPD,Brotherhood for the Fallen Charity Event,0
2,2017-08-31 17:16:43+00:00,DallasPD,"FREE #TRINITY EVENT - Artisan Fair Market - September 2, 2017 #LaborDayWeekend",0
3,2017-08-31 17:13:36+00:00,DallasPD,Our thoughts and prayers are with the Sacramento County Sheriff's Department and the family of Deputy Sheriff Robert French.,0
4,2017-08-31 17:10:47+00:00,DallasPD,How can Dallas residents help animals affected by #HurricaneHarvey? http://fb.me/90nmndc7A,0


# Filter Data for Useful Tweets

In [5]:
# Filter df for useful tweets
df = full_df[full_df['class'] == 1]
df.shape

(103, 4)

In [6]:
df.head()

Unnamed: 0,dates&time,user,tweet,class
239,2017-09-01 04:36:40+00:00,EPPOLICE,"I-10 East at Airway, scheduled road work, all lanes closed, follow detour, clearing time 5 am.",1
240,2017-09-01 04:09:53+00:00,EPPOLICE,"I-10 East at Mesa, schedule road work, all lanes closed, follow detour, clearing time 6 am.",1
242,2017-09-01 00:39:43+00:00,EPPOLICE,"I-10 East @US-54, Collision, Right Lane Closed, Backup To Porfirio Diaz, Clearing Time 1 Hour.",1
247,2017-08-31 23:10:23+00:00,EPPOLICE,"I-10 East @Schuster, Collision, Right Shoulder Closed, Backup To Executive, Clearing Time 1 Hour.",1
248,2017-08-31 22:51:34+00:00,EPPOLICE,"I-10 West @Trowbridge, Collision, Right Lane Closed, Backup To Hawkins, Clearing Time 1 Hour.",1


In [7]:
df.tail()

Unnamed: 0,dates&time,user,tweet,class
2914,2017-08-25 13:00:53+00:00,I35travel,Waxahachie | US 287 Business WB@I-35 E | Various lanes closed today 9AM to 3PM http://i35-maps.tti.tamu.edu/?id=6342 #My35,1
2948,2017-08-31 18:31:58+00:00,TxDOTLufkin,"ALERT: SL 287 East remains closed in Lufkin. Alternate routes are: SL 287 West, BU 59. From Nacogdoches, SH7 W to US 69 S to SL 287 West.",1
3040,2017-08-30 02:10:33+00:00,my290Houston,US 290 EB closed from Fry to Barker Cypress is closed. Only high clearance emergency vehicles can get through.,1
3053,2017-08-29 15:48:59+00:00,my290Houston,"No, if you do not need to travel, please stay put. US 290 EB is closed at Fry. Visit http://drivetexas.org to see updates.",1
3069,2017-08-28 13:59:02+00:00,my290Houston,US 290 mainlanes & frontage are closed in both directions from Barker Cypress to Fry Road due to high water.,1


# Create Location List

In [8]:
# Clean tweets: Map common abbreviations to full-length word
abbrevs = {
    'SB': 'southbound',
    'NB': 'northbound', 
    'EB': 'eastbound', 
    'WB': 'westbound', 
    'St': 'Street', 
    'Rd': 'Road', 
    'E.': 'east',
    'S.': 'south',
    'W.': 'west',
    'N.': 'north',
    'Blvd': 'boulevard',
    'Pkwy': 'parkway',
    'Hwy': 'highway',
    '@': ' at '  
}

# Map 
clean_tweets = []
for tweet in df['tweet']:
    for abbrev, word in abbrevs.items():
        tweet = tweet.replace(abbrev, word)
    tweet = re.sub(r'\b(http|https):\/\/.*[^ alt]\b', '', tweet)
    clean_tweets.append(tweet)
    
clean_tweets[:4]


['I-10 East at Airway, scheduled road work, all lanes closed, follow detour, clearing time 5 am.',
 'I-10 East at Mesa, schedule road work, all lanes closed, follow detour, clearing time 6 am.',
 'I-10 East  at US-54, Collision, Right Lane Closed, Backup To Porfirio Diaz, Clearing Time 1 Hour.',
 'I-10 East  at Schuster, Collision, Right Shoulder Closed, Backup To Executive, Clearing Time 1 Hour.']

In [10]:
# Regex to get good locations from filtered locations above
locations = []

for ad in clean_tweets:
    match_1 = re.search("I\-[0-9]+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+", ad)
    match_2 = re.search("[0-9a-zA-Z]+\sand+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+", ad)
    match_3 = re.search("I[0-9]+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+", ad)
    match_4 = re.search("[a-zA-Z]+\sat+\s[0-9a-zA-Z]+\s[0-9a-zA-Z]+", ad)
    match_5 = re.search("I\-[0-9]+[0-9a-zA-Z]+\s@+[0-9a-zA-Z]+\s[0-9a-zA-Z]+", ad)
    
    if match_1: locations.append(match_1.group())
    elif match_2: locations.append(match_2.group())
    elif match_3: locations.append(match_3.group())
    elif match_4: locations.append(match_4.group())
    elif match_5: locations.append(match_5.group())
        
print(len(locations))
locations[:10]

43


['I-10 East at Airway',
 'I-10 East at Mesa',
 'I-10 East at Asarco',
 'I-10 West at Lomaland',
 'I-10 East at Sunland',
 'I-10 East at Sunland',
 'I-10 West at US',
 'I-10 West at Hawkins',
 'I-10 West at Sunland',
 'I-10 East at Mesa']

In [15]:
# Add ', Texas' to each location
texas_locations = []
for loc in locations:
    loc += ', Texas'
    texas_locations.append(loc)
texas_locations

['I-10 East at Airway, Texas',
 'I-10 East at Mesa, Texas',
 'I-10 East at Asarco, Texas',
 'I-10 West at Lomaland, Texas',
 'I-10 East at Sunland, Texas',
 'I-10 East at Sunland, Texas',
 'I-10 West at US, Texas',
 'I-10 West at Hawkins, Texas',
 'I-10 West at Sunland, Texas',
 'I-10 East at Mesa, Texas',
 'South at Spur 601, Texas',
 'I-10 West at Harnose, Texas',
 'I-10 West at Sunland, Texas',
 'I-10 East at Mesa, Texas',
 'custody at 1750 Lee, Texas',
 'I-10 West at Executive, Texas',
 'I-10 East at Mesa, Texas',
 'I-10 West at Porfirio, Texas',
 'I-10 East at Americas, Texas',
 'closed at Lakeshore boulevard, Texas',
 'crossing at 5003 Wasson, Texas',
 'I10 westbound from Neches, Texas',
 'Beaumont and Winnie is, Texas',
 'Beaumont and Winnie is, Texas',
 'closed and the water, Texas',
 'north and west on, Texas',
 'water at the foot, Texas',
 'Beaumont and Winnie now, Texas',
 'I10 westbound near SH146, Texas',
 'Magnolia at Twin City, Texas',
 'closed at Beason Creek, Texas',
 

# Create and Export Locations

In [16]:
# Create a dataframe of the locations
locations_df = pd.DataFrame(texas_locations, columns=['location'])
locations_df.shape

(43, 1)

In [17]:
locations_df.head()

Unnamed: 0,location
0,"I-10 East at Airway, Texas"
1,"I-10 East at Mesa, Texas"
2,"I-10 East at Asarco, Texas"
3,"I-10 West at Lomaland, Texas"
4,"I-10 East at Sunland, Texas"


In [18]:
# Export locations
locations_df.to_csv('./data/locations.csv', index=False)