In [127]:
import numpy as np
import pandas as pd

In [None]:
# import weather dataset and select relevant columns
weather = pd.read_csv('weather.csv', index_col=False)
weather = weather[['HourlyStationPressure','HourlyPressureChange','HourlyPressureTendency', 'HourlyVisibility',
                   'HourlyWindSpeed', 'HourlySkyConditions', 'HourlyPrecipitation', 'HourlyWetBulbTemperature',
                   'HourlyWindDirection', 'HourlyRelativeHumidity']]


# handle non-numeric values: change those values to NaN and forward fill later
weather.HourlyStationPressure = pd.to_numeric(weather.HourlyStationPressure, errors='coerce')
weather.HourlyVisibility = pd.to_numeric(weather.HourlyVisibility, errors='coerce')
weather.HourlyPrecipitation = pd.to_numeric(weather.HourlyPrecipitation, errors='coerce')
weather.HourlyWindDirection = pd.to_numeric(weather.HourlyWindDirection, errors='coerce')

# convert na values to 0 (no pressure change)
weather.HourlyPressureChange = np.nan_to_num(weather.HourlyPressureChange)
weather.HourlyPressureTendency = np.nan_to_num(weather.HourlyPressureTendency)

# transform sky conditions to two dummy columns: cloud_1 == 1 for overcast sky, cloud_0 == 1 for clear sky
weather['cloud_1'] = 0
weather['cloud_0'] = 1
weather.HourlySkyConditions = weather.HourlySkyConditions.fillna(method='ffill')
for i in range(0, len(weather)):
    v = weather.HourlySkyConditions[i]
    if ':08 ' in v or ':09 ' in v or ':10 ' in v:
        weather.cloud_1[i] = 1
        weather.cloud_0[i] = 0


# transform wind direction to four dummie columns: wind_north, wind_east, wind_south, wind_west
weather['wind_north'] = 0
weather['wind_east'] = 0
weather['wind_south'] = 0
weather['wind_west'] = 0
weather.HourlyWindDirection = weather.HourlyWindDirection.fillna(method='ffill')
for i in range(0, len(weather)):
    d = weather.HourlyWindDirection[i]
    if d > 315 or d <= 45:
        weather.wind_north[i] = 1
    elif d > 225 and d <= 315:
        weather.wind_west[i] = 1
    elif d >135 and d <= 225:
        weather.wind_south[i] = 1
    else:
        weather.wind_east[i] = 1

# forward fill the other columns, assume weather doesn't change in an hour
weather = weather.fillna(method='ffill')