# Amsterdam Event Data

## Imports <a name="imports"></a>

In [2]:
#Read JSON files
#Imports
import pandas as pd
import datetime
import json

## Import Events file <a name="importFile"></a>
Import the open data file 

In [6]:
with open("../../../Data_thesis/Open_Data/Evenementen.json") as event_data:
    events = json.load(event_data)

### Example contents <a name="exampleContents"></a>
Below is an example snippet of an event in the file

In [7]:
#Return eventname
print("Event: ", events[0]["title"])

#Return exact location event in human terms
print("City: ", events[0]["location"]["city"])
print("Adress: ", events[0]["location"]["adress"])
print("Zipcode: ", events[0]["location"]["zipcode"])

#Check source crowdedness
print("Coordinates: ", events[0]["location"]["latitude"], events[0]["location"]["longitude"]) 

#Check whether the data has an event
print("Date: ", events[0]["dates"])

Event:  Springsnow Festival
City:  AMSTERDAM
Adress:  Diverse locaties / Various locations
Zipcode:  1012 JS
Coordinates:  52,3726380 4,8941060
Date:  {'startdate': '20-04-2018', 'enddate': '20-05-2018'}


## Full Dataset <a name="fullDF"></a>
As not all the variables are usable, we make a subset of the following variables:
- *Event Name*: Show the event name
- *Coordinates*: Show the coordinates of the event
- *Data*: Show the event date(s)

### Functions

In [3]:
def transformData(events, lat_low, lat_high, lon_low, lon_high, start_date, end_date):
    """
    This function transforms all present dates between start and end date in the following, given 
    that the coordinatees of the event fall between the given longitude and latitude borders:
    - Date(datetime): date
    - is_event(float): there is an event on the given date

    Parameters:
    - events (json): dataset events 
    - Coordinate borders:
        - lon_low: min value longitude
        - lon_high: max value longitude
        - lat_low: min value Latitude
        - lat_high: max value Latitude
    - start_date: Start date of relevant events
    - end_date: end date of relevant events

    Returns:DF with relevant event data
    """

    #Variables

    #Dict where all the needed data from each instance will be saved
    events_dict = {}

    #Key for each instance in dict
    key = 0

    #################################################################################

    #Loop over all events
    for event in events:

        #Save all the dates of each event in a list
        dates = []

        #Set the latitude and longitude of each date of the event to a float
        lat = float(event["location"]["latitude"].replace(",", "."))
        lon = float(event["location"]["longitude"].replace(",", "."))

        #Check if Longitude and Latitude between specified parameters
        if lon > lon_low and lon < lon_high and lat > lat_low and lat < lat_high:

            #Check if saved in format one or two

            #Format one --> {'startdate': 'dd-mm-yyyy', 'enddate': 'dd-mm-yyyy'}
            if "startdate" in event["dates"]:

                #Append the events to the list
                dates.append(event["dates"]["startdate"])
                dates.append(event["dates"]["enddate"])

            #Format two --> {'singles': ['dd-mm-yyyy',..., 'dd-mm-yyyy']}
            elif "singles" in event["dates"]:

                #Save entire list to dates
                dates = event["dates"]["singles"]

            #Change type from 'str' to 'datetime'
            dates = [pd.Timestamp.strptime(date, "%d-%m-%Y") for date in dates]

            #Save present date with confirmation that there is an event
            for date in dates:
                if start_date < date < end_date:

                    #Dict with all data single event
                    event_date = {"Date": date, "is_event": 1.0}

                    #Append dict to list
                    events_dict[key] = event_date
                    key += 1

    #Convert Dict object to DataFrame and return it
    return pd.DataFrame.from_dict(events_dict, orient="index")

### Variables

In [8]:
#Parameters for area to search in
json_events_path = "../../../../Data_thesis/Open_Data/Evenementen.json"
#longitude
lon_low = 4.88
lon_high = 4.92

#Latitude
lat_low = 52.36
lat_high = 52.39

#Start date for relevant events
start_date = pd.Timestamp(2018, 3, 11)

#End date for relevant events
end_date = pd.Timestamp(2019, 4, 30)

### Construct DF

In [15]:
with open(json_events_path) as file_data:
    #Save data as JSON object
    events = json.load(file_data)

#Transform data to desired format
event_df = transformData(events, lat_low, lat_high, lon_low, lon_high, start_date, end_date)

In [16]:
event_df.head()

Unnamed: 0,Date,is_event
0,2018-04-20,1.0
1,2018-05-20,1.0
2,2018-06-02,1.0
3,2018-06-03,1.0
4,2018-06-04,1.0


## Save to file <a name="CSV"></a>
Save the list of subset events to file

In [12]:
event_df.to_csv("../../../Data_thesis/Full_Datasets/Events.csv", index=False)