In [4]:
import pandas as pd
import os
import csv
from contextlib import nullcontext
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap

In [5]:
# READ THE CSV FILE / IMPORT THE DATA

absolute_path = os.path.abspath('')
relative_path = "data/historical_field_goal_data.csv"
full_path = os.path.join(absolute_path, relative_path)
df = pd.read_csv(full_path)

In [6]:
# PRINT A SAMPLE OF THE DATA

'''
print("HERE IS A SAMPLE OF THE DATA:")
print()
print(df.head())
'''

'\nprint("HERE IS A SAMPLE OF THE DATA:")\nprint()\nprint(df.head())\n'

In [7]:
# PRINT THE FIRST ROW IN A DOME IN THE CURRENT DF

'''
print()
print()
print()
print("HERE IS THE FIRST ROW IN A DOME")
print()
print(df.iloc[19,:])
'''

'\nprint()\nprint()\nprint()\nprint("HERE IS THE FIRST ROW IN A DOME")\nprint()\nprint(df.iloc[19,:])\n'

In [8]:
# CONVERTS THE NULL VALUES IN COLUMN 'temperature' TO 0
df['temperature'] = df['temperature'].fillna(0)

In [9]:
# PRINT ROW 20 (FIRST ROW IN A DOME) SHOWING NEW VALUE, TEMPERATURE SHOULD NOW BE 0
'''
print()
print()
print()
print("HERE IS THE FIRST ROW IN A DOME WITHOUT NULL TEMP")
print(df.iloc[19,:])
'''

'\nprint()\nprint()\nprint()\nprint("HERE IS THE FIRST ROW IN A DOME WITHOUT NULL TEMP")\nprint(df.iloc[19,:])\n'

In [10]:
# CREATE A LIST OF COLUMNS BY NAME BY USING .COLUMNS
'''
list_of_column_names = list(df.columns)

# displaying the list of column names
print()
print("LIST OF COLUMN NAMES:")
print(list_of_column_names)
print()
print()
'''

'\nlist_of_column_names = list(df.columns)\n\n# displaying the list of column names\nprint()\nprint("LIST OF COLUMN NAMES:")\nprint(list_of_column_names)\nprint()\nprint()\n'

In [11]:
# CREATING NEW DATAFRAME TO HOLD RESULTS FROM OUR ANALYSIS

results_df = pd.DataFrame(list())

# WRITING EMPTY DATA FRAME TO THE NEW CSV FILE

results_df.to_csv('data/results_df.csv')

# DEFINING A HEADER ROW

first_row = ["criteria", "attempts_per_game", "fg_percent", "expected_fgs"]

with open('data/results_df.csv', 'w', newline='') as f:

    # create the csv writer

    writer = csv.writer(f)

    # write a row to the csv file
    
    writer.writerow(first_row)

In [12]:
# ADDING ADDITIONAL TEST ROWS
'''
test_row1 = ["x", "y", "z"]
test_row2 = ["apple", "banana", "orange"]

with open('data/results_df.csv', 'a', newline='') as f:
    # create the csv writer
    writer = csv.writer(f)

    # write a row to the csv file
    writer.writerow(test_row1)
    writer.writerow(test_row2)
'''

'\ntest_row1 = ["x", "y", "z"]\ntest_row2 = ["apple", "banana", "orange"]\n\nwith open(\'data/results_df.csv\', \'a\', newline=\'\') as f:\n    # create the csv writer\n    writer = csv.writer(f)\n\n    # write a row to the csv file\n    writer.writerow(test_row1)\n    writer.writerow(test_row2)\n'

In [13]:
# PRINT THE results_df.csv

absolute_path_results = os.path.abspath('')
relative_path_results = "data/results_df.csv"
full_path_results = os.path.join(absolute_path_results, relative_path_results)

print_results_df = pd.read_csv(full_path_results)
print(print_results_df.head())

Empty DataFrame
Columns: [criteria, attempts_per_game, fg_percent, expected_fgs]
Index: []


In [14]:
# METHOD TO ADD RESULTS TO results_df

def add_results_data(data):

    with open('data/results_df.csv', 'a', newline='', encoding='utf8') as f:

        # create the csv writer

        writer = csv.writer(f)

        # write row to the csv file
        
        writer.writerow(data)

In [15]:
# ADDING A TEST ROW TO results.df
'''
results_data_to_add = ["cri", "att", "per", "exp"]
add_results_data(results_data_to_add)

print_results_df = pd.read_csv(full_path_results)
print(print_results_df.head())
'''

'\nresults_data_to_add = ["cri", "att", "per", "exp"]\nadd_results_data(results_data_to_add)\n\nprint_results_df = pd.read_csv(full_path_results)\nprint(print_results_df.head())\n'

In [16]:
# ADDING TOTAL GAME DATA

total_games = 2405

# CALCULATING GAMES WITH NO FG'S

games_with_fgs = df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
games_with_no_fgs = total_games - games_with_fgs

# PRINTING TOTAL NUMBER OF GAMES

print()
print("There were {} games from 2012-2020.".format(total_games))
print()
print()


There were 2405 games from 2012-2020.




In [17]:
# PROVIDING USER WITH INFORMATION ABOUT OUR DATA SET

print()
print("There are", df[['season', 'week', 'stadium']].drop_duplicates().shape[0], "games in our data set.")
print()
print()
print()

# Alternative code using len() instead of .shape()
    #print("There are", len(df[['season', 'week', 'stadium']].drop_duplicates()), "games in our data set.")

print("This means there were {} games with no field goals or field goal attempts.".format(games_with_no_fgs))
print()
print()
print()
print("There were field goal attempts in {} percentage of the games and no field goal attempts in {} percentage of the games.".format(games_with_fgs/total_games, games_with_no_fgs/total_games))
print()
print()
print()


There are 2352 games in our data set.



This means there were 53 games with no field goals or field goal attempts.



There were field goal attempts in 0.977962577962578 percentage of the games and no field goal attempts in 0.02203742203742204 percentage of the games.





In [18]:
# DECLARING VARIABLES

number_of_fg_attempts = len(df[df.made < 3])
number_of_fg_made = len(df[df.made == 1])
number_of_fg_missed = len(df[df.made == 0])
fg_percent = (number_of_fg_made / number_of_fg_attempts)

# Variables above could also be written using .value_counts() but it causes issues later in this program
    #number_of_fg_attempts = df.made.value_counts()
    #fg_percent = df.made.value_counts(1)

attempts_per_game = number_of_fg_attempts / total_games
attempts_per_game_per_team = attempts_per_game / 2
fg_per_game = number_of_fg_made / total_games
years = df['season'].unique()
weeks = df['week'].unique()
stadiums = df['stadium'].unique()
kickers = df['kicker'].unique()
teams = df['offense'].unique()

In [19]:
# ADDING BASE DATA TO results_df

results_data_to_add = ["Avg per team, per game", attempts_per_game_per_team, fg_percent, (attempts_per_game_per_team * fg_percent)]
add_results_data(results_data_to_add)

print_results_df = pd.read_csv(full_path_results)
print(print_results_df.head())

                 criteria  attempts_per_game  fg_percent  expected_fgs
0  Avg per team, per game           1.930561    0.844066      1.629522


In [20]:
# METHOD TO PRINT SPECIFIC DATA FROM THE DATA FILE

def print_df_info(description, variable):
    print()
    print(description + ":")
    print()
    print(variable)
    print()

In [21]:
# PRINT SELECTED DATE FROM THE DATA FILE

print_df_info("NUMBER OF MADE FIELD GOALS:", number_of_fg_attempts)
print_df_info("PERCENTAGE OF MADE FIELD GOALS:", fg_percent)
print_df_info("NUMBER OF FIELD GOALS PER GAME:", fg_per_game)
print_df_info("Here is a list of years where game data was recorded:", years)
print_df_info("Here is a list of weeks where game data was recorded:", weeks)
print_df_info("Here is a list of Stadiums where games have been played:", stadiums)
print_df_info("Here is a list of Kickers who attempted a field goal:", kickers)


NUMBER OF MADE FIELD GOALS::

9286


PERCENTAGE OF MADE FIELD GOALS::

0.8440663364204178


NUMBER OF FIELD GOALS PER GAME::

3.259043659043659


Here is a list of years where game data was recorded::

[2012 2013 2014 2015 2016 2017 2018 2019 2020]


Here is a list of weeks where game data was recorded::

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21]


Here is a list of Stadiums where games have been played::

['MetLife Stadium' 'Soldier Field' 'FirstEnergy Stadium'
 'Arrowhead Stadium' 'Ford Field' 'Mercedes-Benz Superdome' 'NRG Stadium'
 'Nissan Stadium' 'Mall of America Field at HHH Metrodome'
 'Raymond James Stadium' 'University of Phoenix Stadium' 'Lambeau Field'
 'Mile High' 'M&T Bank Stadium' 'O.co Coliseum Oakland'
 'Paul Brown Stadium' 'New Era Stadium' 'Lucas Oil Stadium'
 'Lincoln Financial Field' 'EverBank Field' 'Bank of America Stadium'
 'Gillette Stadium' 'Edward Jones Dome' 'CenturyLink Field'
 'Qualcomm Stadium' 'Sun Life Stadium' 'Heinz Field' 'Can

In [22]:
# METHOD TO ANALYZE FG DATA WITH A SINGLE CRITERIA SEARCH

def compare_single_criteria(column, operator, value, description):

    cop = column + operator + value

    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / fg_att
    p_missed = df.query(cop + "& made == 0").shape[0] / fg_att

    # IF STATEMENT TO DETERMINE THE CORRECT OPERATOR

    if(value.isnumeric()):
        if(operator == "=="):
            temp_df = df.loc[df[column] == float(value)].copy()
        elif(operator == ">"):
            temp_df = df.loc[df[column] > float(value)].copy()
        elif(operator == ">="):
            temp_df = df.loc[df[column] >= float(value)].copy()
        elif(operator == "<"):
            temp_df = df.loc[df[column] < float(value)].copy()
        elif(operator == "<="):
            temp_df = df.loc[df[column] <= float(value)].copy()

    else:
        if(operator == "=="):
            temp_df = df.loc[df[column] == "{}".format(value)].copy()
        elif(operator == "!="):
            temp_df = df.loc[df[column] != value].copy()

    # GTMC IS GAMES THAT MEET CRITERIA
    
    gtmc = temp_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att / gtmc / 2
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)

In [23]:
# METHOD TO ANALYZE FG DATA WITH A DOUBLE CRITERIA SEARCH
# THIS METHOD MUST USE INTEGERS WITH > AS THE FIRST OPERATOR AND < AS THE SECOND OPERATOR

def compare_double_criteria(column, operator, value, operator2, value2, description):

    cop = column + operator + value
    cop2 = column + operator2 + value2

    fg_att = df.query(cop + "&" + cop2).shape[0]
    print("Number of field goals attempted, ", description, ":", fg_att)

    fg_made = df.query(cop + "&" + cop2 + "& made == 1").shape[0]
    print("Number of field goals made, ", description, ":", fg_made)

    fg_missed = df.query(cop + "&" + cop2 + "& made == 0").shape[0]
    print("Number of field goals missed, ", description, ":", fg_missed)

    p_made = df.query(cop + "&"+  cop2 + "& made == 1").shape[0] / df.query(cop + "&" + cop2).shape[0]
    print("Percentage of field goals made, ", description, ":", p_made)

    p_missed = df.query(cop + "&" + cop2 + "& made == 0").shape[0] / df.query(cop + "&" + cop2).shape[0]
    print("Percentage of field goals missed, ", description, ":", p_missed)

    # THIS METHOD MUST USE INTEGERS WITH > AS THE FIRST OPERATOR AND < AS THE SECOND OPERATOR
 
    temp_df = df.loc[(df[column] > int(value)) & (df[column] < int(value2))].copy()
    print(temp_df.head())

    # GTMC IS GAMES THAT MEET CRITERIA
    
    gtmc = temp_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    print("Games ", description, ":", gtmc)

    fg_per_game = fg_att / gtmc / 2
    print("Field Goals attempted per game ", description, ":", fg_per_game)

    expected_made_per_game = fg_per_game * p_made
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)

    '''
    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Expected Field Goals made per game ", description, ":", fg_per_game)
    '''

    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)

In [24]:
# USING THE COMPARE METHODS TO LOOK AT HOW TEMPERATURE EFFECTS FIELD GOALS

compare_single_criteria("temperature", "==", "0", "in a dome")
compare_double_criteria("temperature", ">", "0", "<", "32", "in below freezing temperature")
compare_double_criteria("temperature", ">", "32", "<", "55", "in the cold (32f-55f)")
compare_single_criteria("temperature",  ">", "80", "in the heat (over 80f)")


Number of field goals attempted,  in a dome : 2252
Number of field goals made,  in a dome : 1904
Number of field goals missed,  in a dome : 348
Percentage of field goals made,  in a dome : 0.8454706927175843
Percentage of field goals missed,  in a dome : 0.15452930728241562
Games  in a dome : 561
Field Goals attempted per game  in a dome : 2.0071301247771838
Number of expected field goals made per game,  in a dome : 1.696969696969697


Number of field goals attempted,  in below freezing temperature : 339
Number of field goals made,  in below freezing temperature : 283
Number of field goals missed,  in below freezing temperature : 56
Percentage of field goals made,  in below freezing temperature : 0.8348082595870207
Percentage of field goals missed,  in below freezing temperature : 0.16519174041297935
     season  week away home            stadium  temperature  humidity  \
901    2012    16  IND   KC  Arrowhead Stadium         25.0      74.0   
902    2012    16  IND   KC  Arrowhead St

In [25]:
# USING THE COMPARE METHODS TO LOOK AT HOW WIND SPEED EFFECTS FIELD GOALS

compare_single_criteria("wind_speed", "<", "5", "in low wind(under 5 mph)")
compare_double_criteria("wind_speed", ">", "5", "<", "10", "in medium wind (5-10 mph)")
compare_single_criteria("wind_speed", ">", "10", "in high wind (over 10mph)")


Number of field goals attempted,  in low wind(under 5 mph) : 1483
Number of field goals made,  in low wind(under 5 mph) : 1293
Number of field goals missed,  in low wind(under 5 mph) : 190
Percentage of field goals made,  in low wind(under 5 mph) : 0.8718813216453135
Percentage of field goals missed,  in low wind(under 5 mph) : 0.12811867835468643
Games  in low wind(under 5 mph) : 389
Field Goals attempted per game  in low wind(under 5 mph) : 1.9061696658097687
Number of expected field goals made per game,  in low wind(under 5 mph) : 1.6619537275064267


Number of field goals attempted,  in medium wind (5-10 mph) : 2386
Number of field goals made,  in medium wind (5-10 mph) : 2009
Number of field goals missed,  in medium wind (5-10 mph) : 377
Percentage of field goals made,  in medium wind (5-10 mph) : 0.8419949706621962
Percentage of field goals missed,  in medium wind (5-10 mph) : 0.15800502933780386
   season  week away home              stadium  temperature  humidity  \
5    2012 

In [26]:
# USING THE COMPARE METHODS TO LOOK AT HOW OVER/UNDER EFFECTS FIELD GOALS

compare_single_criteria("ou", "<", "40", "with over/under below 40")
compare_double_criteria("ou", ">", "40", "<", "45", "with over/under between 40 and 45")
compare_double_criteria("ou", ">", "45", "<", "50", "with over/under between 45 and 50")
compare_single_criteria("ou", ">", "50", "with over/under over 50")


Number of field goals attempted,  with over/under below 40 : 627
Number of field goals made,  with over/under below 40 : 523
Number of field goals missed,  with over/under below 40 : 104
Percentage of field goals made,  with over/under below 40 : 0.8341307814992025
Percentage of field goals missed,  with over/under below 40 : 0.16586921850079744
Games  with over/under below 40 : 162
Field Goals attempted per game  with over/under below 40 : 1.9351851851851851
Number of expected field goals made per game,  with over/under below 40 : 1.6141975308641974


Number of field goals attempted,  with over/under between 40 and 45 : 3394
Number of field goals made,  with over/under between 40 and 45 : 2872
Number of field goals missed,  with over/under between 40 and 45 : 522
Percentage of field goals made,  with over/under between 40 and 45 : 0.8461991750147319
Percentage of field goals missed,  with over/under between 40 and 45 : 0.15380082498526812
   season  week away home          stadium  t

In [27]:
# METHOD TO ALANYZE FG DATA WITH A STRING CRITERIA SEARCH

def compare_home_away(column, operator, value, description):

    cop = column + operator + value

    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / fg_att
    p_missed = df.query(cop + "& made == 0").shape[0] / fg_att
    
    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = total_games
    fg_per_game = fg_att / gtmc
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)

In [28]:
# COMPARING HOME AND AWAY GAMES

compare_home_away("home_away", "==", '"home"', "in home games")
compare_home_away("home_away", "==", '"away"', "in away games")


Number of field goals attempted,  in home games : 4655
Number of field goals made,  in home games : 3948
Number of field goals missed,  in home games : 707
Percentage of field goals made,  in home games : 0.8481203007518797
Percentage of field goals missed,  in home games : 0.1518796992481203
Games  in home games : 2405
Field Goals attempted per game  in home games : 1.9355509355509355
Number of expected field goals made per game,  in home games : 1.6415800415800417



Number of field goals attempted,  in away games : 4631
Number of field goals made,  in away games : 3890
Number of field goals missed,  in away games : 741
Percentage of field goals made,  in away games : 0.8399913625566833
Percentage of field goals missed,  in away games : 0.16000863744331678
Games  in away games : 2405
Field Goals attempted per game  in away games : 1.9255717255717255
Number of expected field goals made per game,  in away games : 1.6174636174636174




In [29]:
# COMPARING FIELD GOALS BY STADIUM

# FIRST WE RMOVE STADIUMS WHERE LESS THAN 20 FIELD GOALS WERE ATTEMPTED, THEIR DATA SETS ARE TOO SMALL AND WILL SKEW RESULTS
remove_stadiums = []

for i in stadiums:
    #print("there were {} field goal attempts at {}.".format(len(df[df['stadium'] == i]), i))
    
    if len(df[df['stadium'] == i]) < 20:
        remove_stadiums.append(i)
    
#print(remove_stadiums)

for r in remove_stadiums:
    df_stadiums = df.loc[df.stadium != r]
    stadiums_measured = df_stadiums['stadium'].unique()

#print(df_stadiums)

for s in stadiums_measured:
    stadium_df = df.loc[df['stadium'] == s].copy()

    cop = 'stadium == "{}"'.format(s)
    description = 'in all games at "{}"'.format(s)
    
    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / df.query(cop).shape[0]
    p_missed = df.query(cop + "& made == 0").shape[0] / df.query(cop).shape[0]

    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = stadium_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att / gtmc / 2
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)


Number of field goals attempted,  in all games at "MetLife Stadium" : 592
Number of field goals made,  in all games at "MetLife Stadium" : 516
Number of field goals missed,  in all games at "MetLife Stadium" : 76
Percentage of field goals made,  in all games at "MetLife Stadium" : 0.8716216216216216
Percentage of field goals missed,  in all games at "MetLife Stadium" : 0.12837837837837837
Games  in all games at "MetLife Stadium" : 131
Field Goals attempted per game  in all games at "MetLife Stadium" : 2.2595419847328246
Number of expected field goals made per game,  in all games at "MetLife Stadium" : 1.969465648854962



Number of field goals attempted,  in all games at "Soldier Field" : 271
Number of field goals made,  in all games at "Soldier Field" : 224
Number of field goals missed,  in all games at "Soldier Field" : 47
Percentage of field goals made,  in all games at "Soldier Field" : 0.8265682656826568
Percentage of field goals missed,  in all games at "Soldier Field" : 0.17343

In [30]:
# METHOD TO ANALYZE FG DATA WITH A STRING CRITERIA SEARCH, PER STADIUM

def compare_home_away_per_stadium(column, operator, value, description, temporary_df):

    cop = column + operator + value
    
    temp_s_df = temporary_df
    fg_att2 = temp_s_df.query(cop).shape[0]  
    fg_made = temp_s_df.query(cop + "& made == 1").shape[0]
    fg_missed = temp_s_df.query(cop + "& made == 0").shape[0]
    p_made = temp_s_df.query(cop + "& made == 1").shape[0] / fg_att2
    p_missed = temp_s_df.query(cop + "& made == 0").shape[0] / fg_att2

    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = stadium_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att2 / gtmc
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att2)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)

In [31]:
# COMPARING ONLY HOME GAMES IN EACH STADIUM

for s in stadiums_measured:
    stadium_df = df.loc[df['stadium'] == s].copy()
    compare_home_away_per_stadium("home_away", "==", '"home"', "in home games at {}".format(s), stadium_df)


Number of field goals attempted,  in home games at MetLife Stadium : 267
Number of field goals made,  in home games at MetLife Stadium : 232
Number of field goals missed,  in home games at MetLife Stadium : 35
Percentage of field goals made,  in home games at MetLife Stadium : 0.8689138576779026
Percentage of field goals missed,  in home games at MetLife Stadium : 0.13108614232209737
Games  in home games at MetLife Stadium : 131
Field Goals attempted per game  in home games at MetLife Stadium : 2.0381679389312977
Number of expected field goals made per game,  in home games at MetLife Stadium : 1.7709923664122138



Number of field goals attempted,  in home games at Soldier Field : 130
Number of field goals made,  in home games at Soldier Field : 106
Number of field goals missed,  in home games at Soldier Field : 24
Percentage of field goals made,  in home games at Soldier Field : 0.8153846153846154
Percentage of field goals missed,  in home games at Soldier Field : 0.18461538461538463

In [32]:
# COMPARING ONLY AWAY GAMES IN EACH STADIUM
 
for s in stadiums_measured:
    stadium_df = df.loc[df['stadium'] == s].copy()
    compare_home_away_per_stadium("home_away", "==", '"away"', "in away games at {}".format(s), stadium_df)


Number of field goals attempted,  in away games at MetLife Stadium : 325
Number of field goals made,  in away games at MetLife Stadium : 284
Number of field goals missed,  in away games at MetLife Stadium : 41
Percentage of field goals made,  in away games at MetLife Stadium : 0.8738461538461538
Percentage of field goals missed,  in away games at MetLife Stadium : 0.12615384615384614
Games  in away games at MetLife Stadium : 131
Field Goals attempted per game  in away games at MetLife Stadium : 2.480916030534351
Number of expected field goals made per game,  in away games at MetLife Stadium : 2.16793893129771



Number of field goals attempted,  in away games at Soldier Field : 141
Number of field goals made,  in away games at Soldier Field : 118
Number of field goals missed,  in away games at Soldier Field : 23
Percentage of field goals made,  in away games at Soldier Field : 0.8368794326241135
Percentage of field goals missed,  in away games at Soldier Field : 0.16312056737588654
Ga

In [35]:
# COMPARING BY KICKER



# FIRST WE RMOVE KICKERS WHOT TOOK THAN 20 FIELD GOALS ATTEMPTs, THEIR DATA SETS ARE TOO SMALL AND WILL SKEW RESULTS
remove_kickers = []

for i in kickers:
    #print("there were {} field goal attempts at {}.".format(len(df[df['stadium'] == i]), i))
    
    if len(df[df['kicker'] == i]) < 20:
        remove_kickers.append(i)
    
#print(remove_stadiums)

for r in remove_kickers:
    df_kickers = df.loc[df.kicker != r]
    kickers_measured = df_kickers['kicker'].unique()

#print(df_stadiums)

for k in kickers_measured:
    kicker_df = df.loc[df['kicker'] == k].copy()

    cop = 'kicker == "{}"'.format(k)
    description = '"{}"'.format(k)
    
    #cop = 'kicker == "{}"'.format(k)
    #description = '"{}"'.format(k)
    #kicker_df = df.loc[df['kicker'] == k].copy()

    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / fg_att
    p_missed = df.query(cop + "& made == 0").shape[0] / fg_att
    
    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = kicker_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att / gtmc
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)


Number of field goals attempted,  "Lawrence Tynes" : 39
Number of field goals made,  "Lawrence Tynes" : 33
Number of field goals missed,  "Lawrence Tynes" : 6
Percentage of field goals made,  "Lawrence Tynes" : 0.8461538461538461
Percentage of field goals missed,  "Lawrence Tynes" : 0.15384615384615385
Games  "Lawrence Tynes" : 14
Field Goals attempted per game  "Lawrence Tynes" : 2.7857142857142856
Number of expected field goals made per game,  "Lawrence Tynes" : 2.357142857142857



Number of field goals attempted,  "Dan Bailey" : 262
Number of field goals made,  "Dan Bailey" : 224
Number of field goals missed,  "Dan Bailey" : 38
Percentage of field goals made,  "Dan Bailey" : 0.8549618320610687
Percentage of field goals missed,  "Dan Bailey" : 0.1450381679389313
Games  "Dan Bailey" : 122
Field Goals attempted per game  "Dan Bailey" : 2.1475409836065573
Number of expected field goals made per game,  "Dan Bailey" : 1.8360655737704916



Number of field goals attempted,  "Robbie Gould

In [None]:
# METHOD TO ANALYZE FG DATA WITHIN A SMALLER DATA FRAME

def compare_temp_df(column, operator, value, description, df):

    cop = column + operator + value

    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / df.query(cop).shape[0]
    p_missed = df.query(cop + "& made == 0").shape[0] / df.query(cop).shape[0]
    
    if(value.isnumeric()):
        if(operator == "=="):
            temp_df = df.loc[df[column] == float(value)].copy()
        elif(operator == ">"):
            temp_df = df.loc[df[column] > float(value)].copy()
        elif(operator == ">="):
            temp_df = df.loc[df[column] >= float(value)].copy()
        elif(operator == "<"):
            temp_df = df.loc[df[column] < float(value)].copy()
        elif(operator == "<="):
            temp_df = df.loc[df[column] <= float(value)].copy()

    else:
        if(operator == "=="):
            temp_df = df.loc[df[column] == "{}".format(value)].copy()
        elif(operator == "!="):
            temp_df = df.loc[df[column] != value].copy()

    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = temp_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att / gtmc
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)

In [None]:
# READING THE RESULTS DATA FRAME
absolute_path = os.path.abspath('')
relative_results_path = "data/results_df.csv"
full_path = os.path.join(absolute_path, relative_results_path)
results_df = pd.read_csv(full_path)

results_df.sort_values(by=['expected_fgs'], inplace=True)

# DISPLAYING THE LIST OF COLUMN NAMES

print(results_df.head(20))

#results_df.set_index(df.iloc[0].values)
print()
print("LIST OF COLUMN NAMES:")
print(results_df.columns)
print()
print()

In [None]:
# VISUALIZATION OF THE MOST FAVORABLE CRITERIA

sns.set(rc={'figure.figsize':(75, 25)})
sns.scatterplot(x=results_df['criteria'], y=results_df['expected_fgs'], s=400, color="blue")
plt.title('Criteria that produce the HIGHEST expected number of field goals', fontsize=100)
plt.xlabel('criteria', fontsize=50)
plt.xticks(rotation=45, fontsize=30)
plt.ylabel('expected field goals per game', fontsize=50)
plt.yticks(fontsize=30)
plt.show()

In [None]:
# VISUALIZATION OF THE LEAST FAVORABLE CRITERIA

sns.set(rc={'figure.figsize':(75, 25)})
sns.scatterplot(x=results_df['criteria'], y=results_df['expected_fgs'], s=500, color="blue")
plt.title('Criteria that produce the LOWEST expected number of field goals', fontsize=100)
plt.xlabel('criteria', fontsize=50)
plt.xticks(rotation=90, fontsize=30)
plt.ylabel('expected field goals per game', fontsize=50, color="blue")
plt.yticks(fontsize=30)
#ax3 = sns.scatterplot(x=results_df['criteria'], y=results_df['attempts_per_game'], s=250, color="green")
ax2 = plt.twinx()
sns.scatterplot(x=results_df['criteria'], y=results_df['fg_percent'], ax=ax2,  s=250, color="red")
plt.ylabel('Percentage Made', fontsize=50, color="red")
plt.yticks(fontsize=30)
plt.show()

In [44]:
# CREATING A DICTIONARY THAT HOLDS SEPARATE DATA FRAMES FOR EACH YEAR

for y in years:
    year_df = df[(df.season == y)].copy()

    for t in teams:
    
        cop = 'defense == "{}"'.format(t)
        description = 'against "{}" "{}" defense'.format(y, t)
        team_df =year_df.loc[year_df['defense'] == t].copy()

        fg_att = df.query(cop).shape[0]
        fg_made = df.query(cop + "& made == 1").shape[0]
        fg_missed = df.query(cop + "& made == 0").shape[0]
        p_made = df.query(cop + "& made == 1").shape[0] / fg_att
        p_missed = df.query(cop + "& made == 0").shape[0] / fg_att
    
        # GTMC IS GAMES THAT MEET CRITERIA
        #sb_df = team_df.loc(team_df['week'] == 21).copy()
        #div_champ_df = team_df.loc(team_df['week'] == 20).copy()
        #second_round_df = team_df.loc(team_df['week'] == 19).copy()
        #wild_card_df = team_df.loc(team_df['week'] == 18).copy()

        #gtmc = team_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
        if(team_df[['season', 'week', 'stadium']].drop_duplicates().shape[0] > 16):
            gtmc_formula = team_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
        else:
            gtmc_formula = 16
            
        gtmc = gtmc_formula
        fg_per_game = fg_att / gtmc
        expected_made_per_game = fg_per_game * p_made

        print()
        print("Number of field goals attempted, ", description, ":", fg_att)
        print("Number of field goals made, ", description, ":", fg_made)
        print("Number of field goals missed, ", description, ":", fg_missed)
        print("Percentage of field goals made, ", description, ":", p_made)
        print("Percentage of field goals missed, ", description, ":", p_missed)
        print("Games ", description, ":", gtmc)
        print("Field Goals attempted per game ", description, ":", fg_per_game)
        print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
        print()
        print()

        results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
        add_results_data(results_data_to_add)



Number of field goals attempted,  against "2012" "NYG" defense : 306
Number of field goals made,  against "2012" "NYG" defense : 266
Number of field goals missed,  against "2012" "NYG" defense : 40
Percentage of field goals made,  against "2012" "NYG" defense : 0.869281045751634
Percentage of field goals missed,  against "2012" "NYG" defense : 0.13071895424836602
Games  against "2012" "NYG" defense : 16
Field Goals attempted per game  against "2012" "NYG" defense : 19.125
Number of expected field goals made per game,  against "2012" "NYG" defense : 16.625



Number of field goals attempted,  against "2012" "DAL" defense : 275
Number of field goals made,  against "2012" "DAL" defense : 232
Number of field goals missed,  against "2012" "DAL" defense : 43
Percentage of field goals made,  against "2012" "DAL" defense : 0.8436363636363636
Percentage of field goals missed,  against "2012" "DAL" defense : 0.15636363636363637
Games  against "2012" "DAL" defense : 16
Field Goals attempted per 

In [None]:
# PRINTING SELECT SECTIONS OF THE dict_for_years
print("THIS IS 2012")
print(df_year_2012)


In [None]:
print("THIS IS 2016")
print(df_year_2016)
#print(dict_for_years)

In [36]:
#COMPARING BY DEFENSE PLAYED AGAINST

for t in teams:
    
    cop = 'defense == "{}"'.format(t)
    description = 'against "{}" defense'.format(t)
    team_df = df.loc[df['defense'] == t].copy()

    fg_att = df.query(cop).shape[0]
    fg_made = df.query(cop + "& made == 1").shape[0]
    fg_missed = df.query(cop + "& made == 0").shape[0]
    p_made = df.query(cop + "& made == 1").shape[0] / fg_att
    p_missed = df.query(cop + "& made == 0").shape[0] / fg_att
    
    # GTMC IS GAMES THAT MEET CRITERIA
    gtmc = team_df[['season', 'week', 'stadium']].drop_duplicates().shape[0]
    fg_per_game = fg_att / gtmc
    expected_made_per_game = fg_per_game * p_made

    print()
    print("Number of field goals attempted, ", description, ":", fg_att)
    print("Number of field goals made, ", description, ":", fg_made)
    print("Number of field goals missed, ", description, ":", fg_missed)
    print("Percentage of field goals made, ", description, ":", p_made)
    print("Percentage of field goals missed, ", description, ":", p_missed)
    print("Games ", description, ":", gtmc)
    print("Field Goals attempted per game ", description, ":", fg_per_game)
    print("Number of expected field goals made per game, ", description, ":", expected_made_per_game)
    
    print()
    print()

    results_data_to_add = [description, fg_per_game, p_made, expected_made_per_game]
    add_results_data(results_data_to_add)


Number of field goals attempted,  against "NYG" defense : 306
Number of field goals made,  against "NYG" defense : 266
Number of field goals missed,  against "NYG" defense : 40
Percentage of field goals made,  against "NYG" defense : 0.869281045751634
Percentage of field goals missed,  against "NYG" defense : 0.13071895424836602
Games  against "NYG" defense : 134
Field Goals attempted per game  against "NYG" defense : 2.283582089552239
Number of expected field goals made per game,  against "NYG" defense : 1.9850746268656718



Number of field goals attempted,  against "DAL" defense : 275
Number of field goals made,  against "DAL" defense : 232
Number of field goals missed,  against "DAL" defense : 43
Percentage of field goals made,  against "DAL" defense : 0.8436363636363636
Percentage of field goals missed,  against "DAL" defense : 0.15636363636363637
Games  against "DAL" defense : 127
Field Goals attempted per game  against "DAL" defense : 2.1653543307086616
Number of expected field

In [None]:
for i in kickers:
    compare_single_criteria("kicker", "==", i, i)
    #break down by year - fix grammar

for i in teams:
    compare_single_criteria('offense == "{}"'.format(i), i + " on offense")
    #break down by year - fix grammar

for i in teams:
    compare_single_criteria('defense == "{}"'.format(i), "against " + i)
    #break down by year - fix grammar

In [None]:
# SORTING THE RESULTS
results_df.sort_values(by=['expected_fgs'])

best_df = results_df.head(5)
worst_df = results_df.tail(5)


In [None]:
sns.lmplot('criteria', 'expected_fgs', data=results_df, fit_reg=False, scatter_kws={"marker": "D", "s": 20}) 

plt.title('Scatter Plot of Data without Regression Line')
plt.xlabel('X Axis')
plt.ylabel('Y Axis')
plt.show()



In [None]:
#
## STILL TO DO
#
#df_kicker_by_year = df[["season", "kicker"]].copy()
#print(df_kicker_by_year.head())

# BY OPOSING DEF

#
# remove outliers
# create graphs