# Injury Data Collection - Section 4

This notebook uses our datasets to curate and store the injury model feature set

## Get the Events data and convert to csv format
Get formatting and convert players and event data into csv format

In [None]:
from pandera.typing import DataFrame
from socceraction.data.statsbomb.schema import StatsBombPlayerSchema
import socceraction.data.statsbomb.loader as sch
from typing import cast
from socceraction.spadl.schema import SPADLSchema
import socceraction.spadl.statsbomb as sb
import socceraction.spadl.base as base
import os
import warnings
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
pd.set_option('display.max_columns', None)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.filterwarnings(action="ignore", message="credentials were not supplied. open data access only")
import tqdm
from socceraction.data.statsbomb import StatsBombLoader
import socceraction.spadl as spadl
os.chdir('..')
import util_functions as uf

## Use statsbomb loader to load games and events

In [None]:
SBL = StatsBombLoader(root='data', getter='local')

In [None]:
competitions = SBL.competitions()
set(competitions.competition_name)

In [None]:
#games = pd.concat([
#    SBL.games(row.competition_id, row.season_id)
#    for row in competitions.itertuples()
#])
games = pd.read_csv('data/overview_data/games_data.csv')
teams = pd.read_csv('data/overview_data/teams.csv')
games['game_date'] = pd.to_datetime(games['game_date'])
games_with_team_names = pd.merge(games.rename(columns={'home_team_id':'team_id'}),teams,how='left',on='team_id')
games_with_team_names = games_with_team_names.rename(columns={'team_id':'home_team_id','team_name':'home_team_name'})
games_with_team_names

In [None]:
games_verbose = tqdm.tqdm(list(games.itertuples()), desc="Loading game data")
teams, players = [], []
actions = {}
evs = {}
for game in games_verbose:
    # load data
    teams.append(SBL.teams(game.game_id))
    players.append(uf.players_func(SBL, game.game_id))
    events = SBL.events(game.game_id)
    # convert data
    actions[game.game_id] = spadl.statsbomb.convert_to_actions(events, game.home_team_id)
    events.rename(columns = {'event_id':'original_event_id'}, inplace=True )
    evs[game.game_id] = events

teams = pd.concat(teams).drop_duplicates(subset="team_id")
players = pd.concat(players)

## Getting injury data
Changed date to be in the right format
Injury data collected from transfermrket and put into a dataset which we use to preprocess data in this section

In [None]:
injury_df_formatted = pd.read_csv('data/injury_data/all_player_injuries_updated.csv')
injury_df_formatted = injury_df_formatted.reset_index(drop=True)

injury_df_ffscout = pd.read_csv('data/injury_data/ffscout_injuries_updated.csv')

Update this function to be either game data from transfermrket or from ffscout using different functions and dataframes input. Some comments on tweaking is in util_functions

In [None]:
games = pd.read_csv('data/overview_data/games_data.csv')
all_injuries = []
for g in games['game_id']:
    game_injury_df = uf.get_injury_df_for_game_FFScout(g, games, injury_df_ffscout)
    all_injuries.extend(list(game_injury_df['injured'].values))
    #game_injury_df.to_csv('game_data/'+str(g)+'/'+str(g)+'_injured_ffscout.csv',index=False)

## Create player DFs with their data

In [None]:
player_df = pd.read_csv('data/overview_data/all_players.csv')
teams_df = pd.read_csv('data/overview_data/teams.csv')
player_df

### Get Player game features

In [None]:
phys_cols =['days_diff','rolling_days_diff','rolling_mins_played','rolling_days_diff_exp','rolling_mins_played_exp','pred_dist','metres_per_min','hir_dist','sprint_dist','num_hirs','num_sprints','accels','decels','LI_accels','LI_decels','acute_workload','chronic_workload','ACWR']
for pid in tqdm.tqdm(player_df['player_id']):
    print(pid)
    p_df = pd.DataFrame([])
    dates = []
    mins = []
    positions = []
    num_matches = []
    num_tackles = []
    num_fouls = []
    num_bad_touches = []
    num_ball_touches = []
    num_dribbles = []
    num_tackleds = []
    num_fouleds = []
    is_home = []
    result = []
    goal_diff = []
    referee = []
    team_ids = []
    count = 0
    fixtures_df = pd.read_csv('data/player_data/'+str(pid)+'/'+str(pid)+'_fixtures_with_physical_updated.csv')
    fixtures_df['game_date'] = pd.to_datetime(fixtures_df['game_date'])
    for g in games['game_id']:
        lineup_df = pd.read_csv('data/game_data/'+str(g)+'/'+str(g)+'_lineup.csv')
        if pid in lineup_df['player_id'].values:
            events_df = pd.read_csv('data/game_data/'+str(g)+'/'+str(g)+'_events.csv')
            team_id = lineup_df[lineup_df['player_id'] == pid]['team_id'].values[0]
            home_id = games[games['game_id'] == g]['home_team_id'].values[0]
            minutes_played = lineup_df.loc[lineup_df['player_id']==pid, 'minutes_played'].values[0]
            date = games[games['game_id'] == g]['game_date'].values[0]
            position = lineup_df.loc[lineup_df['player_id'] == pid, 'starting_position_name'].values[0]
            player_events = events_df[(events_df['player_id'] == pid)]
            num_tackle = len(player_events[(player_events['type_name'] == 'tackle')])
            num_foul = len(player_events[(player_events['type_name'] == 'foul')])
            num_bt =  len(player_events[(player_events['type_name'] == 'bad_touch')])
            num_touch = len(player_events)
            num_dribble = len(player_events[(player_events['type_name'] == 'dribble')])
            tackled_df = events_df.loc[[i-1 for i in events_df[events_df['type_name']=='tackle'].index]]
            fouled_df = events_df.loc[[i-1 for i in events_df[events_df['type_name']=='foul'].index]]
            num_tackled = len(tackled_df.loc[tackled_df['player_id'] == pid])
            num_fouled = len(fouled_df.loc[fouled_df['player_id'] == pid])
            team_ids.append(team_id)
            dates.append(date)
            mins.append(minutes_played)
            positions.append(position)
            num_tackles.append(num_tackle)
            num_fouls.append(num_foul)
            num_bad_touches.append(num_bt)
            num_ball_touches.append(num_touch)
            num_dribbles.append(num_dribble)
            num_tackleds.append(num_tackled)
            num_fouleds.append(num_fouled)
            count+=1
            num_matches.append(count)
            referee.append(games[games['game_id'] == g]['referee'].values[0])
            if team_id == home_id:
                is_home.append(True)
                if games[games['game_id'] == g]['home_score'].values[0] == games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('D')
                elif games[games['game_id'] == g]['home_score'].values[0] > games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('W')
                else:
                    result.append('L')
                goal_diff.append(games[games['game_id'] == g]['home_score'].values[0] - games[games['game_id'] == g]['away_score'].values[0])
            else:
                is_home.append(False)
                if games[games['game_id'] == g]['home_score'].values[0] == games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('D')
                elif games[games['game_id'] == g]['home_score'].values[0] > games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('L')
                else:
                    result.append('W')
                goal_diff.append(games[games['game_id'] == g]['away_score'].values[0] - games[games['game_id'] == g]['home_score'].values[0])
    p_df['date'] = dates
    p_df['date'] = pd.to_datetime(p_df['date'])
    p_df['player_id'] = pid
    p_df['player_name'] = player_df[player_df['player_id'] == pid]['player_name'].values[0]
    p_df['team_id'] = team_ids
    p_df['is_home'] = is_home
    p_df['result'] = result
    p_df['goal_diff'] = goal_diff
    p_df['referee'] = referee
    p_df['mins_played'] = mins
    p_df['position'] = positions
    p_df['matches_played'] = num_matches
    p_df['num_tackles'] = num_tackles
    p_df['num_fouls'] = num_fouls
    p_df['num_bad_touches'] = num_bad_touches
    p_df['num_ball_touches'] = num_ball_touches
    p_df['num_dribbles'] = num_dribbles
    p_df['num_tackleds'] = num_tackleds
    p_df['num_fouleds'] = num_fouleds
    p_df['days_since_last_game'] = num_matches
    p_df = p_df.sort_values('date').reset_index(drop=True)
    p_df['matches_played'] = p_df.index
    for i,row in p_df.iterrows():
        curr_date = row['date']
        curr_game_index = fixtures_df[fixtures_df['game_date'].dt.date == curr_date.date()].index
        for col_name in phys_cols:
            p_df.loc[i, col_name] = fixtures_df.loc[curr_game_index,col_name].values[0]
    print("FINAL DF: ", p_df[['date','days_diff','rolling_days_diff_exp','mins_played','rolling_mins_played_exp','pred_dist','num_ball_touches','num_tackles']].head(10))
    #p_df.to_csv('data/player_data/'+str(pid)+'/'+str(pid)+'_gamefeatures_with_physical_updated.csv', index=False)
    

### Get player injury features

In [None]:
injury_df = pd.read_csv('data/injury_data/all_player_injuries_updated.csv')
injury_df

In [None]:
games = games.sort_values('game_date').reset_index(drop=True)

for pid in player_df['player_id'][50:]:
    player_injury_df = injury_df[injury_df['sb_id'] == pid].copy()
    player_injury_df['from'] = np.array(player_injury_df['from'].astype(str),dtype=np.datetime64)
    player_injury_df['until'] = np.array(player_injury_df['until'].astype(str),dtype=np.datetime64)
    player_injury_df['Days'] = np.array(player_injury_df['Days'],dtype=float).astype(int)
    player_injury_df['Games missed'] = np.array(player_injury_df['Games missed'],dtype=float).astype(int)
    p_df = pd.DataFrame([])
    team_ids = []
    dates_l = []
    num_injuries_before_l = []
    total_days_out_l = []
    total_games_missed_l = []
    days_since_last_injury_l = []
    days_out_last_injury_l = []
    type_last_injury_l = []
    games_missed_last_injury_l = []
    frequency_most_prominent_injury_l = []
    days_out_most_prominent_injury_l = []
    games_missed_most_prominent_injury_l = []
    type_most_prominent_injury_l = []
    days_since_most_prominent_injury_l = []
    days_out_most_serious_injury_l = []
    games_missed_most_serious_injury_l = []
    type_most_serious_injury_l = []
    days_since_most_serious_injury_l = []
    injuries_past_three_months_l = []
    injuries_past_six_months_l = []
    injuries_past_twelve_months_l = []
    count = 0
    for g in games['game_id']:
        lineup_df = pd.read_csv('data/game_data/'+str(g)+'/'+str(g)+'_lineup.csv')
        if pid in lineup_df['player_id'].values:
            events_df = pd.read_csv('data/game_data/'+str(g)+'/'+str(g)+'_events.csv')
            team_id = lineup_df[lineup_df['player_id'] == pid]['team_id'].values[0]
            home_id = games[games['game_id'] == g]['home_team_id'].values[0]
            date = np.datetime64(games[games['game_id'] == g]['game_date'].values[0])
            threemthsago = date - np.timedelta64(90, "D")
            sixmthsago = date - np.timedelta64(180, "D")
            twelvemthsago = date - np.timedelta64(360, "D")
            position = lineup_df.loc[lineup_df['player_id'] == pid, 'starting_position_name'].values[0]
            injuries_before = player_injury_df[player_injury_df['until'] < date]
            num_injuries_before = len(injuries_before)
            if num_injuries_before > 0: 
                last_injury = injuries_before[injuries_before['until'] == injuries_before['until'].max()]
                most_prominent_injury = injuries_before[injuries_before['Injury'] == injuries_before['Injury'].value_counts().idxmax()]
                most_serious_injury = injuries_before[injuries_before['Days'] == injuries_before['Days'].max()]
                total_days_out = injuries_before['Days'].sum()
                total_games_missed = injuries_before['Games missed'].sum()
                days_since_last_injury = (date - injuries_before['until'].max()).days
                days_out_last_injury = last_injury['Days'].sum()
                type_last_injury = last_injury['Injury'].values[0]
                games_missed_last_injury = last_injury['Games missed'].sum()
                injuries_past_three_months = len(injuries_before[(injuries_before['until'] > threemthsago)])
                injuries_past_six_months = len(injuries_before[(injuries_before['until'] > sixmthsago)])
                injuries_past_twelve_months = len(injuries_before[(injuries_before['until'] > twelvemthsago)])
            else:
                most_prominent_injury = pd.Series([], dtype=float)
                most_serious_injury = pd.Series([], dtype=float)
                total_days_out = 0
                total_games_missed = 0
                days_out_last_injury = 0
                games_missed_last_injury = 0
                injuries_past_six_months = 0
                injuries_past_three_months = 0
                injuries_past_twelve_months = 0
                days_since_last_injury = None
                games_since_last_injury = None
                type_last_injury = None
            if len(most_prominent_injury) > 0:
                frequency_most_prominent_injury = len(most_prominent_injury)
                days_out_most_prominent_injury = most_prominent_injury['Days'].sum()
                games_missed_most_prominent_injury = most_prominent_injury['Games missed'].sum()
                type_most_prominent_injury = most_prominent_injury['Injury'].values[0]
                days_since_most_prominent_injury = (date - most_prominent_injury['until'].max()).days
            else:
                frequency_most_prominent_injury = 0
                days_out_most_prominent_injury = 0
                games_missed_most_prominent_injury = 0
                type_most_prominent_injury = None
                days_since_most_prominent_injury = None
            if len(most_serious_injury) > 0:
                days_out_most_serious_injury = most_serious_injury['Days'].sum()
                games_missed_most_serious_injury = most_serious_injury['Games missed'].sum()
                type_most_serious_injury = most_serious_injury['Injury'].values[0]
                days_since_most_serious_injury = (date - most_serious_injury['until'].max()).days
            else:
                days_out_most_serious_injury = 0
                games_missed_most_serious_injury = 0
                type_most_serious_injury = None
                days_since_most_serious_injury = None
            team_ids.append(team_id)
            dates_l.append(date)
            num_injuries_before_l.append(num_injuries_before)
            total_days_out_l.append(total_days_out)
            total_games_missed_l.append(total_games_missed)
            days_since_last_injury_l.append(days_since_last_injury)
            days_out_last_injury_l.append(days_out_last_injury)
            type_last_injury_l.append(type_last_injury)
            games_missed_last_injury_l.append(games_missed_last_injury)
            frequency_most_prominent_injury_l.append(frequency_most_prominent_injury)
            days_out_most_prominent_injury_l.append(days_out_most_prominent_injury)
            games_missed_most_prominent_injury_l.append(games_missed_most_prominent_injury)
            type_most_prominent_injury_l.append(type_most_prominent_injury)
            days_since_most_prominent_injury_l.append(days_since_most_prominent_injury)
            days_out_most_serious_injury_l.append(days_out_most_serious_injury)
            games_missed_most_serious_injury_l.append(games_missed_most_serious_injury)
            type_most_serious_injury_l.append(type_most_serious_injury)
            days_since_most_serious_injury_l.append(days_since_most_serious_injury)
            injuries_past_three_months_l.append(injuries_past_three_months)
            injuries_past_six_months_l.append(injuries_past_six_months)
            injuries_past_twelve_months_l.append(injuries_past_twelve_months)
    p_df['date'] = dates_l
    p_df['player_id'] = pid
    p_df['player_name'] = player_df[player_df['player_id'] == pid]['player_name'].values[0]
    p_df['team_id'] = team_ids
    p_df['num_injuries'] = num_injuries_before_l
    p_df['total_days_out'] = total_days_out_l
    p_df['total_games_missed'] = total_games_missed_l
    p_df['days_since_last_injury'] = days_since_last_injury_l
    p_df['days_out_last_injury'] = days_out_last_injury_l
    p_df['games_missed_last_injury'] = games_missed_last_injury_l
    p_df['type_last_injury'] = type_last_injury_l
    p_df['frequency_most_prominent_injury'] = frequency_most_prominent_injury_l
    p_df['days_out_most_prominent_injury'] = days_out_most_prominent_injury_l
    p_df['games_missed_most_prominent_injury'] = games_missed_most_prominent_injury_l
    p_df['type_most_prominent_injury'] = type_most_prominent_injury_l
    p_df['days_since_most_prominent_injury'] = days_since_most_prominent_injury_l
    p_df['days_out_most_serious_injury'] = days_out_most_serious_injury_l
    p_df['games_missed_most_serious_injury'] = games_missed_most_serious_injury_l
    p_df['type_most_serious_injury'] = type_most_serious_injury_l
    p_df['days_since_most_serious_injury'] = days_since_most_serious_injury_l
    p_df['injuries_past_three_months'] = injuries_past_three_months_l
    p_df['injuries_past_six_months'] = injuries_past_six_months_l
    p_df['injuries_past_twelve_months'] = injuries_past_twelve_months_l
    #p_df.to_csv('data/player_data/'+str(pid)+'/'+str(pid)+'_injuryfeatures.csv', index=False)

In [None]:
games = games.sort_values('game_date').reset_index(drop=True)
for team_id in teams_df['team_id']:
    team_df = pd.DataFrame([])
    dates = []
    team_home = []
    num_tackles = []
    num_fouls = []
    num_bts = []
    num_touches = []
    num_dribbles = []
    num_tackleds = []
    num_fouleds = []
    num_matches = []
    result = []
    goal_diff = []
    for g in games['game_id']:
        #lineup_df = pd.read_csv('data/game_data/'+str(g)+'/'+str(g)+'_lineup.csv')
        #print(games[games['game_id'] == g]['home_team_id'].values[0])
        if (team_id == games[games['game_id'] == g]['home_team_id'].values[0]) | (team_id == games[games['game_id'] == g]['away_team_id'].values[0]):
            events_df = pd.read_csv('game_data/'+str(g)+'/'+str(g)+'_events.csv')
            is_home = team_id == games[games['game_id'] == g]['home_team_id'].values[0]
            date = games[games['game_id'] == g]['game_date'].values[0]
            team_events_df = events_df[events_df['team_id'] == team_id]
            num_tackle = len(team_events_df[(team_events_df['type_name'] == 'tackle')])
            num_foul = len(team_events_df[(team_events_df['type_name'] == 'foul')])
            num_bt =  len(team_events_df[(team_events_df['type_name'] == 'bad_touch')])
            num_touch = len(team_events_df)
            num_dribble = len(team_events_df[(team_events_df['type_name'] == 'dribble')])
            tackled_df = events_df.loc[[i-1 for i in events_df[events_df['type_name']=='tackle'].index]]
            fouled_df = events_df.loc[[i-1 for i in events_df[events_df['type_name']=='foul'].index]]
            num_tackled = len(tackled_df.loc[tackled_df['team_id'] == team_id])
            num_fouled = len(fouled_df.loc[fouled_df['team_id'] == team_id])
            if is_home:
                if games[games['game_id'] == g]['home_score'].values[0] == games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('D')
                elif games[games['game_id'] == g]['home_score'].values[0] > games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('W')
                else:
                    result.append('L')
                goal_diff.append(games[games['game_id'] == g]['home_score'].values[0] - games[games['game_id'] == g]['away_score'].values[0])
            else:
                if games[games['game_id'] == g]['home_score'].values[0] == games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('D')
                elif games[games['game_id'] == g]['home_score'].values[0] > games[games['game_id'] == g]['away_score'].values[0]:
                    result.append('L')
                else:
                    result.append('W')
                goal_diff.append(games[games['game_id'] == g]['away_score'].values[0] - games[games['game_id'] == g]['home_score'].values[0])
            dates.append(date)
            team_home.append(is_home)
            num_tackles.append(num_tackle)
            num_fouls.append(num_foul)
            num_bts.append(num_bt)
            num_touches.append(num_touch)
            num_dribbles.append(num_dribble)
            num_tackleds.append(num_tackled)
            num_fouleds.append(num_fouled)
    team_df['date'] = dates
    team_df['team_id'] = team_id
    team_df['team_home'] = team_home
    team_df['num_tackles'] = num_tackles
    team_df['num_fouls'] = num_fouls
    team_df['num_bad_touches'] = num_bts
    team_df['num_touches'] = num_touches
    team_df['num_dribbles'] = num_dribbles
    team_df['num_times_tackled'] = num_tackleds
    team_df['num_times_fouled'] = num_fouleds
    team_df['result'] = result
    team_df['goal_diff'] = goal_diff
    team_df = team_df.sort_values('date').reset_index(drop=True)
    team_df['matches_played'] = team_df.index
    for i,row in team_df.iterrows():
        if i > 0:
            time_now = team_df.loc[i, 'date']
            time_before = team_df.loc[i-1, 'date']
            team_df.loc[i, 'days_since_last_game'] = (datetime.strptime(time_now.split(" ")[0], '%Y-%m-%d') - datetime.strptime(time_before.split(" ")[0], '%Y-%m-%d')).days
            crit_date = datetime.strptime(time_now.split(" ")[0], '%Y-%m-%d') - timedelta(days=30)
            team_df.loc[i, 'games_in_last_month'] = len(team_df[(pd.to_datetime(team_df['date']) >= crit_date) & (pd.to_datetime(team_df['date'], infer_datetime_format=True) < time_now)]) 
        else:
            team_df.loc[i, 'days_since_last_game'] = timedelta(days=100).days
            team_df.loc[i, 'games_in_last_month'] = 0
    #team_df.to_csv('data/team_data/'+str(team_id)+'_features.csv', index=False)

## Weather Data

In [None]:
weather_df = pd.read_csv('data/weather_data/epl_weather.csv.gz', compression='gzip')#pd.read_csv('file.gz', compression='gzip')
weather_df = weather_df.rename(columns = {'kickoff':'game_date','team_a':'home_team_name'})
weather_df['game_date'] = pd.to_datetime(weather_df['game_date']) + pd.DateOffset(hours=1)
weather_df['day'] = weather_df['game_date'].dt.date
games_with_team_names['day'] = games_with_team_names['game_date'].dt.date
weather_df['home_team_name'] = weather_df['home_team_name'].replace({'Wolves':'Wolverhampton Wanderers','Newcastle Utd':'Newcastle United','Tottenham':'Tottenham Hotspur','Huddersfield':'Huddersfield Town','West Ham':'West Ham United','Bournemouth':'AFC Bournemouth','Manchester Utd':'Manchester United','Brighton':'Brighton & Hove Albion', 'West Brom':'West Bromwich Albion'})
merged_weather_df = pd.merge(games_with_team_names, weather_df, how='left', on=['home_team_name', 'day']) #weather_df[['venue','kickoff']]

In [None]:
merged_weather_df_csv = merged_weather_df[['game_id','season_id','game_date_x','home_team_id','away_team_id','home_score','away_score','venue','attendance','latitude','longitude','elevation','temp','snow','windspeedMiles','winddirDegree','weatherCode','precipMM','humidity','visibility','pressure','cloudcover','HeatIndexC','WindChillC','WindGustMiles','FeelsLikeC']].rename(columns={'game_date_x':'game_date'})

## Team/Opponent Injury data

In [None]:
teams_dict = {46:'Wolverhampton Wanderers',58:'Cardiff City',37:'Newcastle United',25:'Southampton',23:'Watford',31:'Crystal Palace',33:'Chelsea'\
              ,36:'Manchester City',28:'AFC Bournemouth',55:'Fulham',35:'Brighton & Hove Albion',38:'Tottenham Hotspur',22:'Leicester City'\
              ,32:'Huddersfield Town',40:'West Ham United',29:'Everton',1:'Arsenal',34:'Burnley',24:'Liverpool',39:'Manchester United'\
              ,26:'Swansea City',27:'West Bromwich Albion',30:'Stoke City'}

names = []
for i in games['home_team_id'].unique():
    # Use the ID as a key to look up the name in the dictionary
    name = teams_dict[i]
    # Append the name to the names list
    names.append(name)

In [None]:
team_injuries_per_game = []
team_injuries_against_per_game = []
for team in games['home_team_id'].unique():
    games_participated = games[(games['home_team_id'] == team) | (games['away_team_id'] == team)]
    n_injuries = 0
    n_injuries_against = 0
    n_games = len(games_participated)
    for game_id in games_participated['game_id']:
        injured_df = pd.read_csv('data/game_data/'+str(game_id)+"/"+str(game_id)+"_injured_ffscout.csv")
        n_injuries += injured_df[injured_df['team_id'] == team]['injured'].sum()
        n_injuries_against += injured_df[injured_df['team_id'] != team]['injured'].sum()
    team_injuries_per_game.append(n_injuries/n_games)
    team_injuries_against_per_game.append(n_injuries_against/n_games)

In [None]:
team_injuries_df = pd.DataFrame([])
team_injuries_df['team_id'] = games['home_team_id'].unique()
team_injuries_df['team_name'] = names 
team_injuries_df['num_injuries_pg'] = team_injuries_per_game
team_injuries_df['num_injuries_against_pg'] = team_injuries_against_per_game
team_injuries_df.sort_values('num_injuries_against_pg')