# Steelers EDA

Thinking that maybe is a better idea to focus on each team individualy, we will first focus on Steelers.

In [None]:
# numerical libraries
import pandas as pd
import numpy as np
from datetime import datetime

# graph libraries
import matplotlib.pyplot as plt

# configs
pd.set_option('display.max_columns', None)

# paths
file_path = '../datasets/games.csv'

In [None]:
# import raw data
games_df_raw = pd.read_csv(file_path)

# select pitsbourgh data
steelers_df_raw = games_df_raw[(games_df_raw['home_team'] == 'PIT')|(games_df_raw['away_team'] == 'PIT')].copy()

# create the steelers dataframe with info of previous years
steelers_df = steelers_df_raw[steelers_df_raw['season'] < 2025].copy()

# season 1999 has no gametime, so we remove it
steelers_df = steelers_df[steelers_df['season'] > 1999].reset_index(drop=True)

# adjust datatype to date
steelers_df['gameday'] = pd.to_datetime(steelers_df['gameday'])

# add a variable: focus team
steelers_df['focus_team'] = 'PIT'

# add variable: status
steelers_df['focus_team_status'] = np.where(steelers_df['home_team'] == 'PIT','home','away')

# add a variable: vs team
steelers_df['versus_team'] = np.where(steelers_df['home_team'] == 'PIT', steelers_df['away_team'], steelers_df['home_team'])

# add variables: winner
steelers_df['focus_team_score'] = np.where(steelers_df['focus_team_status'] == 'home', steelers_df['home_score'], steelers_df['away_score'])
steelers_df['versus_team_score'] = np.where(steelers_df['focus_team_status'] == 'away', steelers_df['home_score'], steelers_df['away_score'])
steelers_df['winner'] = np.where(steelers_df['focus_team_score'] > steelers_df['versus_team_score'],1,0)

# add variables: coaches
steelers_df['focus_team_coach'] = np.where(steelers_df['focus_team_status'] == 'home', steelers_df['home_coach'], steelers_df['away_coach'])
steelers_df['versus_team_coach'] = np.where(steelers_df['focus_team_status'] == 'away', steelers_df['home_coach'], steelers_df['away_coach'])

# selection of columns
steelers_df = steelers_df[['game_id', 'focus_team', 'focus_team_status', 'versus_team', 
                           'focus_team_score', 'versus_team_score', 'winner', 'focus_team_coach', 
                           'versus_team_coach', 'season', 'game_type', 'gameday', 
                           'gametime', 'stadium']]


In [None]:
# graphs of last 3 seasons
fig, axes = plt.subplots(3, 1, figsize=(12, 10))
años = [2024, 2023, 2022]

for i, año in enumerate(años):
    df_año = steelers_df[steelers_df['season'] == año]
    axes[i].plot(df_año['gameday'], df_año['focus_team_score'], marker='o', label='Steelers points')
    axes[i].plot(df_año['gameday'], df_año['versus_team_score'], marker='o', label='Oponent points')
    axes[i].set_title(f'{año} season')
    axes[i].legend()
    axes[i].grid(True)

plt.show()


Here I create two columns with information of the team and the previous 10 games.

In [None]:
for i in range(10,len(steelers_df)):
    steelers_df.loc[i,'focus_team_ltg_wins'] = sum(steelers_df.loc[range(i-10,i),'winner'])
    steelers_df.loc[i,'focus_team_ltg_score'] = sum(steelers_df.loc[range(i-10,i),'focus_team_score'])

In [None]:
# graphs of last 3 seasons
fig, axes = plt.subplots(3, 1, figsize=(12, 10))
años = [2024, 2023, 2022]

for i, año in enumerate(años):
    df_año = steelers_df[steelers_df['season'] == año]
    axes[i].plot(df_año['gameday'], df_año['focus_team_ltg_wins'], marker='o', label='Steelers')
    axes[i].set_title(f'last 10 games wins in {año} season')
    axes[i].legend()
    axes[i].grid(True)

plt.show()

In [None]:
# graphs of last 3 seasons
fig, axes = plt.subplots(3, 1, figsize=(12, 10))
años = [2024, 2023, 2022]

for i, año in enumerate(años):
    df_año = steelers_df[steelers_df['season'] == año]
    axes[i].plot(df_año['gameday'], df_año['focus_team_ltg_score'], marker='o', label='Steelers')
    axes[i].set_title(f'last 10 games wins in {año} season')
    axes[i].legend()
    axes[i].grid(True)

plt.show()