In [1]:
#Import dependencies.
from datetime import datetime
from meteostat import Daily
from meteostat import Point
import pandas as pd
import requests
import json
import itertools

In [2]:
#Imported cfb_attendance_editted.csv, which has manually cleaned data in "Site" column.
df = pd.read_csv("cfb_attendance_editted.csv", encoding = "cp1252")

#Cleaning data (copy and pasted from data_clean.ipynb).
df = df.astype({'Result': 'string'})
df = df.astype({'Fill Rate': 'float64'})
df = df.astype({'Conference': 'string'})

df = df[df["Result"].str.contains("Blue") == False]
df = df[df["Result"].str.contains("White") == False]
df = df[df["Result"].str.contains("NC") == False]
df = df[df["Result"].str.contains("OT") == False]
df = df[df["Result"].str.contains("vacated") == False]
df = df[df["Result"].str.contains('\[') == False]
df = df[df["Result"].str.contains('\(') == False]
df = df[df["Result"].str.contains('\‡') == False]
df = df[df["Result"].str.contains('A') == False]

df['W/L'] = df['Result'].str.split(' ', expand=True)[0]
df['Score'] = df['Result'].str.split(' ', expand=True)[1]
df['Home Score'] = df['Score'].str.split('-', expand=True)[0]
df['Away Score'] = df['Score'].str.split('-', expand=True)[1]

df = df.astype({'Home Score': 'int64'})
df = df.astype({'Away Score': 'int64'})

tscore = df["Home Score"] + df["Away Score"]
df["Total Score"] = tscore

#Splitting "Site" column to put City, State in new column.
#Four different words (Stadium, Bowl, Field, Dome) appear where split needs to occur. Created four separate dataframes filtering for all but one of the words, 
#then splitting the data on that word. Did this for all four words, then concatenated the four dataframes.
dfStadium = df[df["Site"].str.contains("Stadium")]
dfStadium["City, State"] = dfStadium["Site"].str.split("Stadium", expand=True)[1]
dfBowl = df[df["Site"].str.contains("Bowl")]
dfBowl["City, State"] = dfBowl["Site"].str.split("Bowl", expand=True)[1]
dfField = df[df["Site"].str.contains("Field")]
dfField["City, State"] = dfField["Site"].str.split("Field", expand=True)[1]
dfDome = df[df["Site"].str.contains("Dome")]
dfDome["City, State"] = dfDome["Site"].str.split("Dome", expand=True)[1]

merged_df = pd.concat([dfStadium, dfField, dfDome, dfBowl])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [4]:
#Create list of City, State data to iterate over.
city_state_list = merged_df["City, State"].tolist()
#Create empty lists to store lat, lng data for each City, State.
lat = []
lng = []
#Loop through all City, State data and retrieve lat, lng data through Google Maps API.
for i in range(len(city_state_list)):
    target_city_state = city_state_list[i]
    target_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={target_city_state}&key=AIzaSyC3eqFAAPDCogh9CSV4audVKKTFN3H9X2g"
    geo_data = requests.get(target_url).json()
    lat.append(geo_data["results"][0]["geometry"]["location"]["lat"])
    lng.append(geo_data["results"][0]["geometry"]["location"]["lng"])
#Add lat, lng lists to dataframe.    
merged_df["lat"] = lat
merged_df["lng"] = lng

In [6]:
#Create lists of month, day, year data to iterate over.
month_list = merged_df["Month"].tolist()
day_list = merged_df["Day"].tolist()
year_list = merged_df["Year"].tolist()
#Create lists to store weather data for each City, State on the particular month, day, year.
tavg = []
tmin = []
tmax = []
prcp = []
snow = []
wdir = []
wspd = []
wpgt = []
pres = []
tsun = []
#Loop through all City, State data and retrieve weather data from Meteostat API.
for i in range(len(city_state_list)):
    start = datetime(year_list[i], month_list[i], day_list[i])
    end = datetime(year_list[i], month_list[i], day_list[i])
    location = Point(lat[i], lng[i])
    data = Daily(location, start, end)
    data = data.fetch()
    tavg.append(data["tavg"].tolist())
    tmin.append(data["tmin"].tolist())
    tmax.append(data["tmax"].tolist())
    prcp.append(data["prcp"].tolist())
    snow.append(data["snow"].tolist())
    wdir.append(data["wdir"].tolist())
    wspd.append(data["wspd"].tolist())
    wpgt.append(data["wpgt"].tolist())
    pres.append(data["pres"].tolist())
    tsun.append(data["tsun"].tolist())

In [7]:
#Not all data is available for every month, day, year, City, State.
#Loop through weather data (currently a list of lists) and change all empty list values within the weather data lists to None valued lists.
for i in range(len(tavg)):
    if tavg[i] == []:
        tavg[i] = [None]
    if tmin[i] == []:
        tmin[i] = [None]
    if tmax[i] == []:
        tmax[i] = [None]
    if prcp[i] == []:
        prcp[i] = [None]
    if snow[i] == []:
        snow[i] = [None]
    if wdir[i] == []:
        wdir[i] = [None]
    if wspd[i] == []:
        wspd[i] = [None]
    if wpgt[i] == []:
        wpgt[i] = [None]
    if pres[i] == []:
        pres[i] = [None]
    if tsun[i] == []:
        tsun[i] = [None]

#Change list of lists to list of values using itertools.
tavg = list(itertools.chain(*tavg))
tmin = list(itertools.chain(*tmin))
tmax = list(itertools.chain(*tmax))
prcp = list(itertools.chain(*prcp))
snow = list(itertools.chain(*snow))
wdir = list(itertools.chain(*wdir))
wspd = list(itertools.chain(*wspd))
wpgt = list(itertools.chain(*wpgt))
pres = list(itertools.chain(*pres))
tsun = list(itertools.chain(*tsun))

#Add weather data lists to dataframe.
merged_df["tavg"] = tavg
merged_df["tmin"] = tmin
merged_df["tmax"] = tmax
merged_df["prcp"] = prcp
merged_df["snow"] = snow
merged_df["wdir"] = wdir
merged_df["wspd"] = wspd
merged_df["wpgt"] = wpgt
merged_df["pres"] = pres
merged_df["tsun"] = tsun

In [9]:
#Export dataframe to csv file.
merged_df.to_csv("final.csv", index=False)