In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import re
import numpy as np
from sklearn.utils.extmath import weighted_mode

In [3]:
# bring in golf data

pga_tour_data = pd.read_csv('tour_data_pga.csv')
pga_tour_data['city_date'] =  pga_tour_data['course_city_code'] + pga_tour_data['round_date']

In [7]:
# bring in weather data

pga_weather = pd.read_csv('pga_weather.csv')
pga_weather['city_date'] = pga_weather['City'] + pga_weather['Date']

In [8]:
# get unique event-round tee times

pga_tour_unique_event_tee_round = pga_tour_data[['city_date', 'teetimeinmin', 'event_name', 'par_avg_time']].drop_duplicates()

In [None]:
# weighted average function

def weighted_average(dataframe, value, weight):
    val = dataframe[value]
    wt = dataframe[weight]
    return (val * wt).sum() / wt.sum()

In [51]:
# for loop to get weather per tee time for each round of an event

round_weather = []

for index, row in pga_tour_unique_event_tee_round.iterrows():    
    try:
        city_date = str(row['city_date'])
        tee_time = int(row['teetimeinmin'])
        event_name = str(row['event_name'])
        par_avg_time = float(row['par_avg_time'])
        weather_df = pga_weather[pga_weather['city_date'] == str(row['city_date'])].sort_values(by=['TimeinMin'])
        weather_df = weather_df[weather_df['TimeinMin'] >= int(row['teetimeinmin'])] 
        weather_df = weather_df[weather_df['TimeinMin'] < (int(row['teetimeinmin']) + float(row['par_avg_time']))]
        weather_df['row_num'] = np.arange(len(weather_df))
        weather_df['before_mins'] = np.where(weather_df['row_num'] == 0, weather_df['TimeinMin'] - int(row['teetimeinmin']), 
                                            weather_df.TimeinMin.diff()/2)
        weather_df['after_mins'] = np.where(weather_df['row_num'] == max(weather_df['row_num']), 
                                            (int(row['teetimeinmin']) + float(row['par_avg_time'])) - weather_df['TimeinMin'],
                                           abs(weather_df.TimeinMin.diff(periods=-1)/2))
        weather_df['total_mins'] = weather_df['before_mins'] + weather_df['after_mins']
        avg_temp = weighted_average(weather_df, 'Temperature', 'total_mins')
        avg_dew = weighted_average(weather_df, 'Dew Point', 'total_mins')
        avg_wind_speed = weighted_average(weather_df, 'Wind Speed', 'total_mins')
        avg_wind_gust = weighted_average(weather_df, 'Wind Gust', 'total_mins')
        avg_pressure = weighted_average(weather_df, 'Pressure', 'total_mins')
        avg_humidity = weighted_average(weather_df, 'Humidity', 'total_mins')
        sum_precipitation = sum(weather_df['Precip.'])
        mode_condition = weighted_mode(weather_df['Condition'], weather_df['total_mins'])[0][0]
        mode_wind_direction = weighted_mode(weather_df['Wind'], weather_df['total_mins'])[0][0]
        weather_round = pd.DataFrame([[city_date, tee_time, event_name, par_avg_time, avg_temp,avg_dew,
                                       avg_wind_speed,avg_wind_gust,avg_pressure,avg_humidity,sum_precipitation,
                                       mode_condition,mode_wind_direction]],
                              columns = ['city_date', 'tee_time', 'event_name', 'par_avg_time',
                                         'avg_temperature', 'avg_dew_point', 'avg_wind_speed',
                                        'avg_wind_gust', 'avg_pressure', 'avg_humidity', 'sum_precipitation',
                                        'mode_condition', 'mode_wind_direction'])
        round_weather.append(weather_round)
        
    except (ValueError) as error:
        print(str(row['city_date']))
        continue
    

In [54]:
# writing final df with weather for each event-round tee time

round_weather_df = pd.concat(round_weather)
round_weather_df.to_csv('weather_measures_tee_times.csv')