In [247]:
import eventful
import pandas as pd
from datetime import datetime, timedelta

In [248]:
# Get the api key from http://api.eventful.com/

api = eventful.API('<api key goes here>')

#### For training we want the events from start time to end time of the data provided to us 

In [249]:
data = pd.read_csv("daily_counts_10-25-19.csv")
data['Time'] = pd.to_datetime(data['Time'])

min_ts = min(data['Time'])
max_ts = max(data['Time'])

In [246]:
min_ts, max_ts

(Timestamp('2016-02-17 00:00:00'), Timestamp('2019-10-24 00:00:00'))

In [250]:
# Create a list of dates from start time till end time

start_date = datetime(2016, 2, 17)
end_date = datetime(2019, 10, 24)

def datespan(startDate, endDate, delta=timedelta(days=1)):
    currentDate = startDate
    while currentDate < endDate:
        yield currentDate
        currentDate += delta

path_list = []
for single_date in datespan(start_date, end_date):
    path_list.append((str(single_date.year)) + "/" + (str(single_date.month)) + "/"
                              + (str(single_date.day)))

In [251]:
title = []
venue_name = []
stop_time = []
start_time = []

In [72]:
for dt in path_list:
    events = api.call('/events/search', l='78701', t=dt, within='5', units='miles')
    try:
        for event in events['events']['event']:
            title.append(event['title'])
            venue_name.append(event['venue_name'])
            stop_time.append(event['stop_time'])
            start_time.append(event['start_time'])
    # enters except statement when we did not find any events for a particular date 
    except:
        continue

In [74]:
len(venue_name)

3935

In [201]:
events_df = pd.DataFrame()

events_df['title'] = title
events_df['venue_name'] = venue_name
events_df['start_time'] = start_time
events_df['stop_time'] = stop_time

events_df['start_time'] = pd.to_datetime(events_df['start_time'])

events_df.head(100)

In [206]:
# Make the events date in the format of the trail data date format 

events_df['start_date'] = events_df['start_time'].dt.strftime('%m/%d/%Y')

#### Events weighting : For those events that have words in the word list, they are more likely to be events around the trail, so give a higher weight to these types of events

In [207]:
word_list = ['bike','run','running','music','tour','hike','camp','camping','bird','yoga','kayak','kayaking', 'trail', 'walk','biking','hiking','walking']
def type_of_event(x):
    x = x.lower()
    x = x.split()
    if(len(set(x).intersection(word_list)) >=1):
        return 2
    return 1

In [208]:
events_df['event_weight'] = events_df['title'].apply(type_of_event)

In [223]:
events_df

Unnamed: 0,title,venue_name,start_time,stop_time,start_date,event_weight
0,SAFe® Advanced Scrum Master with SASM Certific...,Sheraton Austin Hotel @ the Capitol,2019-10-19 09:00:00,,10/19/2019,1
1,Guide a Bike Tour!,"Postal code 78741, US",2019-10-19 11:53:00,,10/19/2019,2
2,Early Bird Brunch,Cafe Blue,2019-10-19 09:00:00,,10/19/2019,2
3,Guide a Bike Tour!,"Postal code 78741, US",2019-10-19 11:53:00,,10/19/2019,2
4,Energetic S@x Practicum & Playshop w/ Reid Mih...,Austin,2019-10-19 10:30:00,,10/19/2019,1
...,...,...,...,...,...,...
3930,Jennifer Fuentes,Parker Jazz Club,2019-10-23 22:00:00,,10/23/2019,1
3931,Urban Roots Annual Seed to Harvest Kick-Off,Austin Children's Museum,2019-10-23 18:00:00,,10/23/2019,1
3932,Preschool Storytime,Twin Oaks Branch,2019-10-23 00:00:00,,10/23/2019,1
3933,All Ages Storytime,Howson Branch Library,2019-10-23 00:00:00,,10/23/2019,1


In [210]:
events_grouped = events_df.groupby(['start_date'])[['event_weight']].sum()

In [211]:
events_grouped = events_grouped.rename(columns={'start_date' : 'weight'})

In [218]:
events_grouped

Unnamed: 0_level_0,event_weight,date
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1
01/01/2019,2,2019-01-01
01/02/2019,5,2019-01-02
01/03/2019,2,2019-01-03
01/04/2019,2,2019-01-04
01/05/2019,2,2019-01-05
...,...,...
12/27/2018,4,2018-12-27
12/28/2018,3,2018-12-28
12/29/2018,4,2018-12-29
12/30/2018,3,2018-12-30


In [213]:
events_grouped['date'] = events_grouped.index
events_grouped['date'] = pd.to_datetime(events_grouped['date'])

#### Joining the events dataframe on weather and trail count data :

In [227]:
weather = pd.read_csv("weather_10-22-19.csv")

weather['DATE'] = pd.to_datetime(weather['DATE'])
data['Time'] = pd.to_datetime(data['Time'])

final_df = weather[['PRCP','TMAX','TMIN','DATE']].merge(data, left_on=['DATE'], right_on=['Time'], how='right')

In [236]:
temp = events_grouped.merge(final_df, left_on=['date'], right_on = ['Time'], how='right')

In [240]:
temp.drop(['date','DATE'], inplace=True, axis=1)

In [241]:
temp['event_weight'] = temp['event_weight'].fillna(0)

In [243]:
temp.columns

Index(['event_weight', 'PRCP', 'TMAX', 'TMIN', 'Time',
       'Butler Trail : Crenshaw Bridge - Urban Trail',
       'Butler Trail: South Lamar - Urban Trail',
       'Butler Trail: North Congress - Urban Trail',
       'Butler Trail : Longhorn Dam - Urban Trail',
       'Shoal Creek Trail & 24th St- Urban Trail'],
      dtype='object')

In [234]:
temp.to_csv("events_df.csv")