# NFL Database Project: WebScraping & Data Cleaning
<br>
CIS 761 Database Management Systems – Term Project<br>
Kansas State University
<br><br>
Vishnu Bondalakunta<br>
Charles Zumbaugh<br>
James Chapman<br>
<br>

#### This notebook uses the public ESPN API. The URLs and endpoints were found in the following GitHub link. ESPN does not provide official documentation.

* ## [List of NFL API Endpoints](https://gist.github.com/nntrn/ee26cb2a0716de0947a0a4e9a157bc1c#event-competitions-api)

#### 10 tables are collected and saved as CSV files, which is used to populate the NFL database.
* games
* season_dates
* venues
* teams
* linescores
* rosters
* positions
* athletes
* plays
* player_plays

In [1]:
import pandas as pd
import numpy as np
import requests
import re

In [2]:
years = range(2013,2024) # ! Keep track of calendar year and NFL season year

ESPN_EVENTS = 'https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?limit=1000&dates={}' #.format(year)
ESPN_ROSTERS = 'https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/events/{}/competitions/{}/competitors/{}/roster' #.format(game_id,game_id,team_id)
ESPN_ATHLETE_INFO = 'https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/athletes/{}' #.format(player_id)
ESPN_VENUE_INFO = 'http://sports.core.api.espn.com/v2/sports/football/leagues/nfl/venues/{}' #.format(venue_id)
ESPN_TEAM_INFO = 'https://site.api.espn.com/apis/site/v2/sports/football/nfl/teams/{}' #.format(team_id)
ESPN_POSITION_INFO = 'http://sports.core.api.espn.com/v2/sports/football/leagues/nfl/positions/{}' #.format(position_id)
ESPN_PLAY_BY_PLAY = 'https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/events/{}/competitions/{}/plays?limit=300' #.format(game_id,game_id)
ESPN_STATISTICS = 'https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/events/{}/competitions/{}/competitors/{}/roster/{}/statistics/0' #.format(game_id,game_id,team_id,player_id)

# Games

In [3]:
games = pd.DataFrame()
for year in years:
    try:
        events = requests.get(ESPN_EVENTS.format(year)).json()
        yearlyEvents = pd.json_normalize(events['events'])
        games = pd.concat([games, yearlyEvents], ignore_index=True)
    except: 
        print(year)

# games['competitions'] is a list of dictionaries 
games['attendance'] = games['competitions'].apply(lambda x : x[0]['attendance']).astype('int64')
games['venue_id'] = games['competitions'].apply(lambda x : x[0]['venue']['id']).astype('int64')

# games['competitions'][0]['competitors'] is a list of 2 dictionaries (home and away).
games['competitors'] = games['competitions'].apply(lambda x : x[0]['competitors'])

games['home_team_id'] = games['competitors'].apply(lambda x : x[0]['id']).astype('int64')
games['away_team_id'] = games['competitors'].apply(lambda x : x[1]['id']).astype('int64')

# Drop pre-season, off-season, & probowl
games = games.drop(games[(games['season.type'].isin([1, 4]))].index)
games = games.drop(games[(games['home_team_id'].isin([31,32,35,36]))].index)

# Split datetime into date/time 
# decompose date/week/season year/season type
games['datetime'] = pd.to_datetime(games['date'])
games['utc_time'] = games['datetime'].dt.strftime("%H:%M")
games['date'] = games['datetime'].dt.strftime("%Y-%m-%d")
season_dates = games[['date','season.year','season.type','week.number']].copy()

#Keep around For linescores
gameLinescores = games[['id','competitors']].copy()

# Drop rows
games = games.rename(columns={'id':'game_id'})
games = games[['game_id',
               'date',
               'attendance',
               'venue_id',
               'home_team_id',
               'away_team_id',
               'utc_time']] 
games['game_id'] = games['game_id'].astype('int64')
games.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2976 entries, 0 to 3584
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   game_id       2976 non-null   int64 
 1   date          2976 non-null   object
 2   attendance    2976 non-null   int64 
 3   venue_id      2976 non-null   int64 
 4   home_team_id  2976 non-null   int64 
 5   away_team_id  2976 non-null   int64 
 6   utc_time      2976 non-null   object
dtypes: int64(5), object(2)
memory usage: 186.0+ KB


# Season_Dates

In [4]:
season_dates = season_dates.rename(columns={'season.year':'season_year',
                                          'season.type':'season_type',
                                          'week.number':'week'})
# season_types names
season_dates.loc[(season_dates['season_type']== 2),'season_type'] = 'Regular Season'
season_dates.loc[(season_dates['season_type']== 3),'season_type'] = 'Post Season'

season_dates.drop_duplicates(inplace=True)
season_dates.info(verbose=True) 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 827 entries, 0 to 3584
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   date         827 non-null    object
 1   season_year  827 non-null    int64 
 2   season_type  827 non-null    object
 3   week         827 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 32.3+ KB


# Venues

In [5]:
venues = pd.DataFrame()
for venue_id in pd.unique(games['venue_id']):
    try:
        venue_info = requests.get(ESPN_VENUE_INFO.format(venue_id)).json()
        venue_info = pd.json_normalize(venue_info)
        venues = pd.concat([venues, venue_info], ignore_index=True)
    except: 
        print(venue_id)

# # Drop rows
venues = venues[['id',
                 'fullName',
                 'grass',
                 'indoor',
                 'address.city',
                 'address.state']]
venues = venues.rename(columns={'id':'venue_id',
                                'fullName':'venue_name',
                                'address.city':'city',
                                'address.state':'state'})

venues['capacity'] = 0
venues[['venue_id','capacity']] = venues[['venue_id','capacity']].astype('int64')
venues.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   venue_id    46 non-null     int64 
 1   venue_name  46 non-null     object
 2   grass       46 non-null     bool  
 3   indoor      46 non-null     bool  
 4   city        46 non-null     object
 5   state       40 non-null     object
 6   capacity    46 non-null     int64 
dtypes: bool(2), int64(2), object(3)
memory usage: 2.0+ KB


# Teams

In [6]:
teams = pd.DataFrame()
for team_id in pd.unique(games['home_team_id']):
    try:
        team_info = requests.get(ESPN_TEAM_INFO.format(team_id)).json()
        team_info = pd.json_normalize(team_info)
        teams = pd.concat([teams, team_info], ignore_index=True)
    except: 
        print(team_id)

# # Drop rows
teams = teams[['team.id',
                 'team.location',
                 'team.name',
                 'team.abbreviation',
                 'team.franchise.venue.id',
                 'team.color',
                 'team.alternateColor']]
teams = teams.rename(columns={'team.id':'team_id',
                             'team.location':'location',
                             'team.name':'team_name',
                             'team.abbreviation':'abbreviation',
                             'team.franchise.venue.id':'venue_id',
                             'team.color':'primary_color',
                             'team.alternateColor':'secondary_color'})
teams[['team_id','venue_id']] = teams[['team_id','venue_id']].astype('int64')
teams.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   team_id          32 non-null     int64 
 1   location         32 non-null     object
 2   team_name        32 non-null     object
 3   abbreviation     32 non-null     object
 4   venue_id         32 non-null     int64 
 5   primary_color    32 non-null     object
 6   secondary_color  32 non-null     object
dtypes: int64(2), object(5)
memory usage: 1.9+ KB


# Linescores 


In [7]:
linescores = pd.DataFrame(columns = ["game_id","team_id","quarter","score"])
overtimecount = 0
def eachrow(game):
    try:
        for competitor in game['competitors']: # competitors[0] = home team
            quarter = 0
            for linescore in competitor['linescores']:
                quarter += 1
                linescores.loc[len(linescores.index)] = [game['id'], 
                                                         competitor['id'],
                                                         quarter, 
                                                         linescore['value']]
    except:
        print('Game canceled-', game['id'])

junk = gameLinescores.apply(eachrow, axis=1)

linescores[['game_id','team_id','score']] = linescores[['game_id','team_id','score']].astype('int64')
linescores.info(verbose=True)

Game canceled- 400554331
Game canceled- 400951581
<class 'pandas.core.frame.DataFrame'>
Int64Index: 24140 entries, 0 to 24139
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   game_id  24140 non-null  int64
 1   team_id  24140 non-null  int64
 2   quarter  24140 non-null  int64
 3   score    24140 non-null  int64
dtypes: int64(4)
memory usage: 943.0 KB


# Rosters 

The rosters URL lists all players under 'entries'. But the position is something like this…<br>
$ref : "http://sports.core.api.espn.com/v2/sports/football/leagues/nfl/positions/46?lang=en&region=us" <br>
This just splits the link at "positions" giving "/46?lang=en&region=us"
and then takes only numbers "46"




In [8]:
rosters = pd.DataFrame(columns = ["game_id", "team_id", "athlete_id","position_id","didNotPlay"])
def eachrow(game):
    temp_game_id = game['game_id']
    temp_home_team_id = game['home_team_id']
    temp_away_team_id = game['away_team_id']

    try:
        homeRoster = requests.get(ESPN_ROSTERS.format(temp_game_id,temp_game_id,temp_home_team_id)).json()
        for player in homeRoster['entries']:
            # position_id = re.search(r'(?<=positions/)[0-9]+(?=\?)', player['position']['$ref']).group(0)
            split = re.split("positions", player['position']['$ref'])
            position_id = re.sub("[^0-9.]", "", split[1])
            rosters.loc[len(rosters.index)] = [temp_game_id, 
                                                temp_home_team_id,
                                                player['playerId'], 
                                                position_id, 
                                                player['didNotPlay']]
    
        awayRoster = requests.get(ESPN_ROSTERS.format(temp_game_id,temp_game_id,temp_away_team_id)).json()
        # print(temp_game_id, temp_home_team_id, temp_away_team_id, len(homeRoster['entries']), len(awayRoster['entries']))
        for player in awayRoster['entries']:
            split = re.split("positions", player['position']['$ref'])
            position_id = re.sub("[^0-9.]", "", split[1])
            rosters.loc[len(rosters.index)] = [temp_game_id, 
                                                temp_home_team_id,
                                                player['playerId'], 
                                                position_id, 
                                                player['didNotPlay']]
    except:
        print('Game canceled or Pro bowl -', game['game_id'])

games.apply(eachrow, axis=1)

rosters[['game_id','team_id','athlete_id','position_id']] = rosters[['game_id','team_id','athlete_id','position_id']].astype('int64')
rosters['played'] = ~rosters['didNotPlay'].astype('boolean')
rosters = rosters.drop(['didNotPlay'], axis=1)
rosters.info(verbose=True)

Game canceled or Pro bowl - 400554331
Game canceled or Pro bowl - 400874729
<class 'pandas.core.frame.DataFrame'>
Int64Index: 363404 entries, 0 to 363403
Data columns (total 5 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   game_id      363404 non-null  int64  
 1   team_id      363404 non-null  int64  
 2   athlete_id   363404 non-null  int64  
 3   position_id  363404 non-null  int64  
 4   played       363404 non-null  boolean
dtypes: boolean(1), int64(4)
memory usage: 14.6 MB


# Positions

In [9]:
positions = pd.DataFrame()
for position_id in pd.unique(rosters['position_id']):
    try:
        position_info = requests.get(ESPN_POSITION_INFO.format(position_id)).json()
        position_info = pd.json_normalize(position_info)
        positions = pd.concat([positions, position_info], ignore_index=True)
    except: 
        print(position_info)

def eachrow(x):
    try:
        for competitor in game['competitors']: # competitors[0] = home team
            quarter = 0
            for linescore in competitor['linescores']:
                quarter += 1
                linescores.loc[len(linescores.index)] = [game['id'], 
                                                         competitor['id'],
                                                         quarter, 
                                                         linescore['value']]
    except:
        print('Game canceled-', game['id'])

# Drop rows
positions = positions[['id', 'name', 'abbreviation']]
positions['platoon'] = ''
positions = positions.rename(columns={'id':'position_id','name':'position_name'})
positions['position_id'] = positions['position_id'].astype('int64')
positions.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   position_id    24 non-null     int64 
 1   position_name  24 non-null     object
 2   abbreviation   24 non-null     object
 3   platoon        24 non-null     object
dtypes: int64(1), object(3)
memory usage: 896.0+ bytes


# Athletes 

In [None]:
print(4568981)

In [10]:
athletes = pd.DataFrame()
for athlete_id in pd.unique(rosters['athlete_id']):
    try:
        athlete_info = requests.get(ESPN_ATHLETE_INFO.format(athlete_id)).json()
        athlete_info = pd.json_normalize(athlete_info['athlete'])
        athletes = pd.concat([athletes, athlete_info], ignore_index=True)
    except: 
        print(athlete_id)

def split_birthplace(s: str, index: int):
    try:
        val = s.split(', ')[index]
        return val
    except:
        return ''

athletes['birth_city'] = athletes['displayBirthPlace'].apply(lambda x: split_birthplace(x, 0))
athletes['birth_state'] = athletes['displayBirthPlace'].apply(lambda x: split_birthplace(x, 1))

# Drop rows
athletes = athletes.rename(columns={'id':'athlete_id',
                                     'displayHeight':'height',
                                     'displayWeight':'weight',
                                     'displayDOB':'dob'})#'displayDraft':'drafted_bool',
athletes = athletes[['athlete_id',
                     'firstName',
                     'lastName',
                     'birth_city',
                     'birth_state',
                     'jersey',
                     'height',
                     'weight',
                     'dob']]
                     #'drafted_bool',#'debutYear',#'displayDraft']]#'college.id',
                     #'jersey',#'displayJersey',#'displayExperience',#'position.id',
                     #'team.id',#'collegeTeam.id',#'collegeAthlete.id'

4568981
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7166 entries, 0 to 7165
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   athlete_id   7166 non-null   object
 1   firstName    7166 non-null   object
 2   lastName     7166 non-null   object
 3   birth_city   7166 non-null   object
 4   birth_state  7166 non-null   object
 5   jersey       6650 non-null   object
 6   height       7158 non-null   object
 7   weight       7158 non-null   object
 8   dob          7121 non-null   object
dtypes: object(9)
memory usage: 504.0+ KB


  athletes['birth_city'] = athletes['displayBirthPlace'].apply(lambda x: split_birthplace(x, 0))
  athletes['birth_state'] = athletes['displayBirthPlace'].apply(lambda x: split_birthplace(x, 1))


In [None]:
athletes.info(verbose=True)

In [11]:
def extract_number(s: str) -> str:
    try:
        return re.match(r'[0-9]+', s).group(0)
    except:
        return pd.NA #'NULL'


def get_height_inches(height_str: str) -> int:
    # pattern of display height in [0-9]'[0-9]+" where the first
    # value is height in feet and the second is height in inches.
    try:
        matches = re.findall(r'[0-9]+', height_str)
        feet = matches[0]
        inches = matches[1]
        return int(feet) * 12 + int(inches)
    except:
        return pd.NA

def clean_date(date_string: str) -> str:
    # The date is formatted in one of several ways:
    # %M-%d-%Y
    # %M-%d-%y
    # %m-%d-%Y
    # %m-%d-%y
    try:
        date_values = re.findall(r'[0-9]+', date_string)
        day = date_values[0]
        month = date_values[1]
        year = date_values[2]
        day = '0' + day if len(day) < 2 else day
        month = '0' + month if len(month) < 2 else month
        if len(year) < 4:
            if int(year) > 24:
                year = '19' + year
            else:
                year = '20' + year
        return f'{month}-{day}-{year}'
    except:
        return ''

athletes['weight'] = athletes['weight'].apply(extract_number)
athletes['height'] = athletes['height'].apply(get_height_inches)
athletes['dob'] = athletes['dob'].apply(clean_date)
athletes.info(verbose=True)

# Replace IDs with names
For Database Normalization. <br>
* venue_id --> venue_name
* team_id --> team_name
* position_id --> position_name

In [12]:
# Games table - venue_id, home_team_id, & away_team_id
games = games.merge(venues[['venue_id','venue_name']],how="left",on = ['venue_id'])
games = games.merge(teams[['team_id','team_name']],how="left", left_on = ['home_team_id'],right_on = ['team_id'])
games = games.rename(columns={'team_name':'home_team_name'})
games = games.drop(['team_id'], axis=1)

# away_team_id
games = games.merge(teams[['team_id','team_name']],how="left", left_on = ['away_team_id'],right_on = ['team_id'])
games = games.rename(columns={'team_name':'away_team_name'})
games = games.drop(['venue_id','away_team_id','home_team_id','team_id'], axis=1)

# teams table - venue_id
teams = teams.merge(venues[['venue_id','venue_name']],how="left",on = ['venue_id'])
teams = teams.drop(['venue_id'], axis=1)

# linescores table - team_id
linescores = linescores.merge(teams[['team_id','team_name']],how="left",on = ['team_id'])
linescores = linescores.drop(['team_id'], axis=1)

# rosters table - team_id & position_id
rosters = rosters.merge(teams[['team_id','team_name']],how="left",on = ['team_id'])
rosters = rosters.merge(positions[['position_id','position_name']],how="left",on = ['position_id'])
rosters = rosters.drop(['team_id','position_id'], axis=1)
rosters = rosters.drop(rosters[(rosters['athlete_id'].isin([4568981,2514468,17372,11717]))].index)
rosters.drop_duplicates(inplace=True)

# Drop ID columns from original tables
teams = teams.drop(['team_id'], axis=1)
venues = venues.drop(['venue_id'], axis=1)
positions = positions.drop(['position_id'], axis=1)

In [13]:
athletesDUPLICATES = athletes[athletes.duplicated(keep='first', subset=['firstName','lastName','dob'])][['firstName','lastName','dob']].copy()
for row in athletesDUPLICATES[athletesDUPLICATES['lastName'] != 'Team'].to_numpy():
    IdS = (athletes.loc[(athletes['firstName']==row[0])& (athletes['lastName']==row[1])& (athletes['dob']==row[2])]['athlete_id']).dropna().unique()
    print(row[0],row[1],row[2])
    print('--Change all athlete_id=',IdS[0],'to',IdS[1])
    rosters.loc[(rosters['athlete_id']==IdS[0]),'athlete_id']= IdS[1]

Mike Johnson 04-02-1987
--Change all athlete_id= 13279 to 2219984
Tom Johnson 08-30-1984
--Change all athlete_id= 10001 to 16695
Willie Smith 11-13-1986
--Change all athlete_id= 14447 to 2219931
Branden Albert 11-04-1984
--Change all athlete_id= 11249 to 3938599
Chris Manhertz 04-10-1992
--Change all athlete_id= 2531358 to 4071345
Brandon Scherff 12-26-1991
--Change all athlete_id= 2511708 to 4287933
De'Angelo Henderson 11-24-1992
--Change all athlete_id= 2968226 to 2565755


# Plays

In [16]:
plays = pd.DataFrame()
player_plays = pd.DataFrame(columns = ['game_id','play_id','type',"player_id"])

# ignore Plays column that we don't use has a data type conversion
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

for game_id in games['game_id']:
    try:
        game_data = requests.get(ESPN_PLAY_BY_PLAY.format(game_id,game_id)).json()
        play_data = pd.json_normalize(game_data['items'])
        play_data['game_id'] = game_id
        plays = pd.concat([plays, play_data], ignore_index=True)
    except: 
        print(game_id)

plays = plays.rename(columns={'id':'play_id',
                             'type.text':'play_type',
                             'period.number':'quarter',
                             'clock.value':'seconds_remaining',
                              'scoreValue':'score_value',
                              'statYardage':'yards',
                              'start.down':'start_down',
                             'end.down':'end_down'})

temp = plays[['game_id','play_id','participants']].copy()

plays = plays[['play_id',
               'start_down',
               'end_down',
               'quarter',
               'play_type',
               'seconds_remaining',
               'score_value',
               'yards']] 

plays['start_down'] = plays['start_down'].apply(lambda x: x if x >= 0 else 0)
plays['end_down'] = plays['end_down'].apply(lambda x: x if x >= 0 else 0)

plays[['start_down', 'end_down', 'quarter', 
       'seconds_remaining', 'score_value', 'yards']] = plays[['start_down', 'end_down', 'quarter', 
                                                              'seconds_remaining', 'score_value', 'yards']].astype('int64')
plays.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522707 entries, 0 to 522706
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   play_id            522707 non-null  object
 1   start_down         522707 non-null  int64 
 2   end_down           522707 non-null  int64 
 3   quarter            522707 non-null  int64 
 4   play_type          522691 non-null  object
 5   seconds_remaining  522707 non-null  int64 
 6   score_value        522707 non-null  int64 
 7   yards              522707 non-null  int64 
dtypes: int64(6), object(2)
memory usage: 31.9+ MB


In [None]:
def eachrow(play):
    try:
        for player in play['participants']: 
            player_url = player['athlete']['$ref']
            player_id = re.search(r'(?<=athletes/)[0-9]+(?=\?)', player_url).group(0)
            player_plays.loc[len(player_plays.index)] = [play['game_id'], 
                                                       play['play_id'],
                                                       player['type'], 
                                                       player_id]
    except:
        pass
    
junk = temp.apply(eachrow, axis=1)

player_plays.info(verbose=True)

# Review

In [None]:
games.info(verbose=True)
for column in games.columns.tolist():
    print(column,'--------',pd.unique(games[column]).size)

In [None]:
season_dates.info(verbose=True)
for column in season_dates.columns.tolist():
    print(column,'--------',pd.unique(season_dates[column]).size)

In [None]:
linescores.info(verbose=True)
for column in linescores.columns.tolist():
    print(column,'--------',pd.unique(linescores[column]).size)

In [None]:
rosters.info(verbose=True)
for column in rosters.columns.tolist():
    print(column,'--------',pd.unique(rosters[column]).size)

In [None]:
athletes.info(verbose=True)
for column in athletes.columns.tolist():
    print(column,'--------',pd.unique(athletes[column]).size)

In [None]:
venues.info(verbose=True)
for column in venues.columns.tolist():
    print(column,'--------',pd.unique(venues[column]).size)

In [None]:
teams.info(verbose=True)
for column in teams.columns.tolist():
    print(column,'--------',pd.unique(teams[column]).size)

In [None]:
positions.info(verbose=True)
for column in positions.columns.tolist():
    print(column,'--------',pd.unique(positions[column]).size)

In [None]:
plays.info(verbose=True)
for column in plays.columns.tolist():
    print(column,'--------',pd.unique(plays[column]).size)

In [None]:
player_plays.info(verbose=True)
for column in player_plays.columns.tolist():
    print(column,'--------',pd.unique(player_plays[column]).size)

# Save

In [None]:
#######################################
games.to_csv('games.csv', index=False)
season_dates.to_csv('season_dates.csv', index=False)
linescores.to_csv('linescores.csv', index=False)
rosters.to_csv('rosters.csv', index=False)
athletes.to_csv('athletes.csv', index=False)
venues.to_csv('venues.csv', index=False)
teams.to_csv('teams.csv', index=False)
positions.to_csv('positions.csv', index=False)
plays.to_csv('plays.csv', index=False)
player_plays.to_csv('player_plays.csv', index=False)
#######################################

In [None]:
: )