In [3]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

df = pd.read_csv("nfl_data.csv")
df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14.0,23.0,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71,
1,9/3/1966,1966,1,False,Houston Oilers,45.0,7.0,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70,
2,9/4/1966,1966,1,False,San Diego Chargers,27.0,7.0,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82,
3,9/9/1966,1966,2,False,Miami Dolphins,14.0,19.0,New York Jets,,,,Orange Bowl,False,82.0,11.0,78,
4,9/10/1966,1966,1,False,Green Bay Packers,24.0,3.0,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62,


In [4]:
#Remove columns that aren't needed
df_football = df.drop(['schedule_season', 'schedule_week', 'schedule_playoff', 'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line', 'stadium', 'stadium_neutral', 'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'weather_detail'], axis=1)


In [5]:
#Add column for wins and losses. True = Win
df_football['win_or_loss'] = df_football['score_home'] > df_football['score_away']
df_football

Unnamed: 0,schedule_date,team_home,score_home,score_away,win_or_loss
0,9/2/1966,Miami Dolphins,14.0,23.0,False
1,9/3/1966,Houston Oilers,45.0,7.0,True
2,9/4/1966,San Diego Chargers,27.0,7.0,True
3,9/9/1966,Miami Dolphins,14.0,19.0,False
4,9/10/1966,Green Bay Packers,24.0,3.0,True
...,...,...,...,...,...
12929,1/3/2021,New England Patriots,,,False
12930,1/3/2021,New York Giants,,,False
12931,1/3/2021,Philadelphia Eagles,,,False
12932,1/3/2021,San Francisco 49ers,,,False


In [6]:
#Convert boolean to integer
df_football['win_or_loss'] = df.apply(lambda row: row['score_home'] > row['score_away'], axis=1).astype(int)
df_football

Unnamed: 0,schedule_date,team_home,score_home,score_away,win_or_loss
0,9/2/1966,Miami Dolphins,14.0,23.0,0
1,9/3/1966,Houston Oilers,45.0,7.0,1
2,9/4/1966,San Diego Chargers,27.0,7.0,1
3,9/9/1966,Miami Dolphins,14.0,19.0,0
4,9/10/1966,Green Bay Packers,24.0,3.0,1
...,...,...,...,...,...
12929,1/3/2021,New England Patriots,,,0
12930,1/3/2021,New York Giants,,,0
12931,1/3/2021,Philadelphia Eagles,,,0
12932,1/3/2021,San Francisco 49ers,,,0


In [7]:
#Convert integers to Win or Loss
df_football.replace({1: 'Win', 0: 'Loss'}, inplace=True)
df_football

Unnamed: 0,schedule_date,team_home,score_home,score_away,win_or_loss
0,9/2/1966,Miami Dolphins,14,23,Loss
1,9/3/1966,Houston Oilers,45,7,Win
2,9/4/1966,San Diego Chargers,27,7,Win
3,9/9/1966,Miami Dolphins,14,19,Loss
4,9/10/1966,Green Bay Packers,24,3,Win
...,...,...,...,...,...
12929,1/3/2021,New England Patriots,,,Loss
12930,1/3/2021,New York Giants,,,Loss
12931,1/3/2021,Philadelphia Eagles,,,Loss
12932,1/3/2021,San Francisco 49ers,,,Loss


In [8]:
#filter for the Browns
df_football = df_football[df_football["team_home"] == "Cleveland Browns"]
df_football

Unnamed: 0,schedule_date,team_home,score_home,score_away,win_or_loss
16,9/18/1966,Cleveland Browns,20,21,Loss
28,9/25/1966,Cleveland Browns,28,34,Loss
48,10/8/1966,Cleveland Browns,41,10,Win
72,10/23/1966,Cleveland Browns,30,21,Win
106,11/13/1966,Cleveland Browns,27,7,Win
...,...,...,...,...,...
12788,11/1/2020,Cleveland Browns,6,16,Loss
12814,11/15/2020,Cleveland Browns,,,Loss
12828,11/22/2020,Cleveland Browns,,,Loss
12885,12/14/2020,Cleveland Browns,,,Loss


In [9]:
#read in the weather file
weather_df = pd.read_csv("history_data (2).csv")
weather_df.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,...,Unnamed: 16360,Unnamed: 16361,Unnamed: 16362,Unnamed: 16363,Unnamed: 16364,Unnamed: 16365,Unnamed: 16366,Unnamed: 16367,Unnamed: 16368,Unnamed: 16369
0,"Cleveland, OH, United States",9/8/1985,91.1,71.0,78.6,,100.9,2.19,,17.2,...,,,,,,,,,,
1,"Cleveland, OH, United States",9/16/1985,70.1,43.1,58.7,,,0.0,,11.4,...,,,,,,,,,,
2,"Cleveland, OH, United States",10/6/1985,53.9,38.9,46.2,32.2,,0.0,,13.9,...,,,,,,,,,,
3,"Cleveland, OH, United States",10/20/1985,56.9,48.8,53.5,44.1,,0.06,,15.0,...,,,,,,,,,,
4,"Cleveland, OH, United States",10/27/1985,62.0,44.9,52.5,41.0,,0.0,,10.3,...,,,,,,,,,,


In [12]:
#list(weather_df.columns) 

['Name',
 'Date time',
 'Maximum Temperature',
 'Minimum Temperature',
 'Temperature',
 'Wind Chill',
 'Heat Index',
 'Precipitation',
 'Snow Depth',
 'Wind Speed',
 'Wind Gust',
 'Visibility',
 'Cloud Cover',
 'Relative Humidity',
 'Conditions',
 'Unnamed: 15',
 'Unnamed: 16',
 'Unnamed: 17',
 'Unnamed: 18',
 'Unnamed: 19',
 'Unnamed: 20',
 'Unnamed: 21',
 'Unnamed: 22',
 'Unnamed: 23',
 'Unnamed: 24',
 'Unnamed: 25',
 'Unnamed: 26',
 'Unnamed: 27',
 'Unnamed: 28',
 'Unnamed: 29',
 'Unnamed: 30',
 'Unnamed: 31',
 'Unnamed: 32',
 'Unnamed: 33',
 'Unnamed: 34',
 'Unnamed: 35',
 'Unnamed: 36',
 'Unnamed: 37',
 'Unnamed: 38',
 'Unnamed: 39',
 'Unnamed: 40',
 'Unnamed: 41',
 'Unnamed: 42',
 'Unnamed: 43',
 'Unnamed: 44',
 'Unnamed: 45',
 'Unnamed: 46',
 'Unnamed: 47',
 'Unnamed: 48',
 'Unnamed: 49',
 'Unnamed: 50',
 'Unnamed: 51',
 'Unnamed: 52',
 'Unnamed: 53',
 'Unnamed: 54',
 'Unnamed: 55',
 'Unnamed: 56',
 'Unnamed: 57',
 'Unnamed: 58',
 'Unnamed: 59',
 'Unnamed: 60',
 'Unnamed: 61',
 

In [10]:
#Getting rid of empty columns
weather_df_slim = weather_df[['Name',
 'Date time',
 'Maximum Temperature',
 'Minimum Temperature',
 'Temperature',
 'Wind Chill',
 'Heat Index',
 'Precipitation',
 'Snow Depth',
 'Wind Speed',
 'Wind Gust',
 'Visibility',
 'Cloud Cover',
 'Relative Humidity',
 'Conditions']]
weather_df_slim.head()
#list(weather_df.columns) 

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions
0,"Cleveland, OH, United States",9/8/1985,91.1,71.0,78.6,,100.9,2.19,,17.2,,9.0,94.7,75.94,"Rain, Overcast"
1,"Cleveland, OH, United States",9/16/1985,70.1,43.1,58.7,,,0.0,,11.4,,15.1,12.3,64.64,Clear
2,"Cleveland, OH, United States",10/6/1985,53.9,38.9,46.2,32.2,,0.0,,13.9,,16.5,53.9,71.97,Partially cloudy
3,"Cleveland, OH, United States",10/20/1985,56.9,48.8,53.5,44.1,,0.06,,15.0,,12.7,100.0,71.57,"Rain, Overcast"
4,"Cleveland, OH, United States",10/27/1985,62.0,44.9,52.5,41.0,,0.0,,10.3,,17.9,74.3,54.68,Partially cloudy


In [13]:
#schedule_date	team_home	score_home	score_away

total_data = pd.merge(weather_df_slim, df_football, left_on="Date time", right_on="schedule_date")
total_data.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions,schedule_date,team_home,score_home,score_away,win_or_loss
0,"Cleveland, OH, United States",9/8/1985,91.1,71.0,78.6,,100.9,2.19,,17.2,,9.0,94.7,75.94,"Rain, Overcast",9/8/1985,Cleveland Browns,24,27,Loss
1,"Cleveland, OH, United States",9/16/1985,70.1,43.1,58.7,,,0.0,,11.4,,15.1,12.3,64.64,Clear,9/16/1985,Cleveland Browns,17,7,Win
2,"Cleveland, OH, United States",10/6/1985,53.9,38.9,46.2,32.2,,0.0,,13.9,,16.5,53.9,71.97,Partially cloudy,10/6/1985,Cleveland Browns,24,20,Win
3,"Cleveland, OH, United States",10/20/1985,56.9,48.8,53.5,44.1,,0.06,,15.0,,12.7,100.0,71.57,"Rain, Overcast",10/20/1985,Cleveland Browns,20,21,Loss
4,"Cleveland, OH, United States",10/27/1985,62.0,44.9,52.5,41.0,,0.0,,10.3,,17.9,74.3,54.68,Partially cloudy,10/27/1985,Cleveland Browns,7,14,Loss


In [15]:
#what are all of the weather conditions?
data_groups = total_data.groupby("Conditions")
Condition_list = data_groups.count()
Condition_list

Unnamed: 0_level_0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,schedule_date,team_home,score_home,score_away,win_or_loss
Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Clear,5,5,5,5,5,3,0,5,0,5,0,5,5,5,5,5,5,5,5
Overcast,21,21,21,21,21,17,1,21,6,21,0,21,21,21,21,21,21,21,21
Partially cloudy,24,24,24,24,24,16,3,24,2,24,0,24,24,24,24,24,24,24,24
Rain,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1
"Rain, Overcast",28,28,28,28,28,19,1,28,5,28,0,28,28,28,28,28,28,28,28
"Rain, Partially cloudy",4,4,4,4,4,3,0,4,2,4,0,4,4,4,4,4,4,4,4


In [17]:
#Changing to raining or dry
total_data.replace({"Clear": "Dry", "Overcast": "Dry", "Partially cloudy": "Dry", "Rain, Overcast": "Rain", "Rain, Partially cloudy" : "Rain"}, inplace=True)
total_data.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions,schedule_date,team_home,score_home,score_away,win_or_loss
0,"Cleveland, OH, United States",9/8/1985,91.1,71.0,78.6,,100.9,2.19,,17.2,,9.0,94.7,75.94,Rain,9/8/1985,Cleveland Browns,24,27,Loss
1,"Cleveland, OH, United States",9/16/1985,70.1,43.1,58.7,,,0.0,,11.4,,15.1,12.3,64.64,Dry,9/16/1985,Cleveland Browns,17,7,Win
2,"Cleveland, OH, United States",10/6/1985,53.9,38.9,46.2,32.2,,0.0,,13.9,,16.5,53.9,71.97,Dry,10/6/1985,Cleveland Browns,24,20,Win
3,"Cleveland, OH, United States",10/20/1985,56.9,48.8,53.5,44.1,,0.06,,15.0,,12.7,100.0,71.57,Rain,10/20/1985,Cleveland Browns,20,21,Loss
4,"Cleveland, OH, United States",10/27/1985,62.0,44.9,52.5,41.0,,0.0,,10.3,,17.9,74.3,54.68,Dry,10/27/1985,Cleveland Browns,7,14,Loss


In [21]:
rain_win_df = total_data[(total_data["Conditions"] == "Rain") & (total_data["win_or_loss"] == "Win")]
rain_win_df.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions,schedule_date,team_home,score_home,score_away,win_or_loss
10,"Cleveland, OH, United States",10/12/1986,70.1,53.9,61.6,,,0.01,,23.0,,9.6,93.9,69.64,Rain,10/12/1986,Cleveland Browns,20,7,Win
13,"Cleveland, OH, United States",11/23/1986,48.8,33.2,42.7,27.0,,0.01,,18.3,,10.3,83.5,82.39,Rain,11/23/1986,Cleveland Browns,37,31,Win
20,"Cleveland, OH, United States",11/8/1987,64.1,44.9,54.5,39.1,,0.08,,16.1,,5.5,97.0,54.35,Rain,11/8/1987,Cleveland Browns,38,3,Win
30,"Cleveland, OH, United States",11/20/1988,47.9,41.1,45.8,33.4,,1.19,,19.7,,4.3,100.0,95.86,Rain,11/20/1988,Cleveland Browns,27,7,Win
32,"Cleveland, OH, United States",12/18/1988,24.2,17.0,20.1,4.0,,0.04,4.33,18.3,,5.2,98.7,81.51,Rain,12/18/1988,Cleveland Browns,28,23,Win


In [22]:
rain_loss_df = total_data[(total_data["Conditions"] == "Rain") & (total_data["win_or_loss"] == "Loss")]
rain_loss_df.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions,schedule_date,team_home,score_home,score_away,win_or_loss
0,"Cleveland, OH, United States",9/8/1985,91.1,71.0,78.6,,100.9,2.19,,17.2,,9.0,94.7,75.94,Rain,9/8/1985,Cleveland Browns,24,27,Loss
3,"Cleveland, OH, United States",10/20/1985,56.9,48.8,53.5,44.1,,0.06,,15.0,,12.7,100.0,71.57,Rain,10/20/1985,Cleveland Browns,20,21,Loss
8,"Cleveland, OH, United States",9/18/1986,70.1,56.9,62.9,,,0.22,,13.9,,8.5,99.6,79.31,Rain,9/18/1986,Cleveland Browns,13,30,Loss
17,"Cleveland, OH, United States",1/11/1987,33.2,29.0,30.5,15.9,,0.02,0.39,25.3,,5.6,100.0,81.91,Rain,1/11/1987,Cleveland Browns,20,23,Loss
27,"Cleveland, OH, United States",10/9/1988,59.1,44.9,50.3,40.0,,0.01,,11.6,,12.5,96.1,67.19,Rain,10/9/1988,Cleveland Browns,10,16,Loss


In [None]:
dry_win_df = total_data[(total_data["Conditions"] == "Dry") & (total_data["win_or_loss"] == "Win")]
dry_win_df.head()

In [23]:
dry_loss_df = total_data[(total_data["Conditions"] == "Dry") & (total_data["win_or_loss"] == "Loss")]
dry_loss_df.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Wind Chill,Heat Index,Precipitation,Snow Depth,Wind Speed,Wind Gust,Visibility,Cloud Cover,Relative Humidity,Conditions,schedule_date,team_home,score_home,score_away,win_or_loss
4,"Cleveland, OH, United States",10/27/1985,62.0,44.9,52.5,41.0,,0.0,,10.3,,17.9,74.3,54.68,Dry,10/27/1985,Cleveland Browns,7,14,Loss
22,"Cleveland, OH, United States",12/6/1987,34.1,24.2,30.7,17.1,,0.0,1.97,9.2,,15.1,87.0,78.19,Dry,12/6/1987,Cleveland Browns,7,9,Loss
25,"Cleveland, OH, United States",9/11/1988,79.1,53.9,65.2,,,0.0,,9.2,,11.7,34.3,73.34,Dry,9/11/1988,Cleveland Browns,3,23,Loss
36,"Cleveland, OH, United States",10/15/1989,80.0,56.9,68.5,,,0.0,,20.8,,9.0,63.0,63.95,Dry,10/15/1989,Cleveland Browns,7,17,Loss
39,"Cleveland, OH, United States",11/19/1989,44.9,19.1,30.4,9.1,,0.0,3.94,19.7,,13.4,90.0,64.31,Dry,11/19/1989,Cleveland Browns,10,10,Loss


In [24]:
points_df = total_data[["Temperature", "Precipitation", "score_home"]]
points_df.head()

Unnamed: 0,Temperature,Precipitation,score_home
0,78.6,2.19,24
1,58.7,0.0,17
2,46.2,0.0,24
3,53.5,0.06,20
4,52.5,0.0,7
