<a href="https://colab.research.google.com/github/CarlosSotero/NFL_Stats_Players/blob/main/NFL_Moderna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Bibliotecas nescessárias**

In [16]:
import kagglehub
import os
import pandas as pd
!pip install nfl_data_py
import nfl_data_py as nfl
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns



## **Download dos datasets**

In [17]:
# Download latest version
path = kagglehub.dataset_download("philiphyde1/nfl-stats-1999-2022")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'nfl-stats-1999-2022' dataset.
Path to dataset files: /kaggle/input/nfl-stats-1999-2022


In [18]:
# Mostra os arquivos que vieram no caminho do kagglehub
print(os.listdir(path))


['yearly_player_stats_defense.csv', 'yearly_player_stats_offense.csv', 'yearly_team_stats_offense.csv', 'weekly_player_stats_offense.csv', 'yearly_team_stats_defense.csv', 'weekly_player_stats_defense.csv', 'weekly_team_stats_defense.csv', 'weekly_team_stats_offense.csv']


In [37]:
# Carregar os datasets anuais por jogador (ofensivo e defensivo)
offense_path = path + "/yearly_player_stats_offense.csv"
defense_path = path + "/yearly_player_stats_defense.csv"
df_offense = pd.read_csv(offense_path)
df_defense = pd.read_csv(defense_path)

# Colunas do df de ataque selecionadas
offense_cols = [
    'player_id', 'player_name', 'position', 'team', 'season', 'age', 'games_played_season',
    'height', 'weight', 'college', 'draft_year', 'draft_round', 'draft_pick', 'years_exp',
    'pass_attempts', 'complete_pass', 'passing_yards', 'pass_touchdown', 'interception',
    'passer_rating', 'comp_pct', 'ypa',
    'rush_attempts', 'rushing_yards', 'rush_touchdown', 'fumble', 'fumble_lost', 'ypc',
    'targets', 'receptions', 'receiving_yards', 'receiving_touchdown', 'yards_after_catch', 'yptarget', 'ypr',
    'total_yards', 'total_tds', 'fantasy_points_ppr', 'fantasy_points_standard',
    'season_ypg', 'season_yptouch', 'season_average_ppr_ppg', 'offense_snaps', 'offense_pct'
]

# DF de Ataque com as colunas selecionadas
df_offense_reduzido = df_offense[offense_cols]
print("Shape do DF Ofensivo reduzido:", df_offense_reduzido.shape)


# Colunas do df de defesa selecionadas
defense_cols = [
    'player_id', 'player_name', 'position', 'team', 'season', 'age', 'games_played_season',
    'height', 'weight', 'college', 'draft_year', 'draft_round', 'draft_pick', 'seasons_played',
    'solo_tackle', 'assist_tackle', 'tackle_with_assist', 'sack', 'qb_hit',
    'interception', 'fumble_forced', 'safety', 'def_touchdown',
    'defense_snaps', 'defense_pct', 'fantasy_points_ppr', 'fantasy_points_standard'
]

# DF de Defesa com as colunas selecionadas
df_defense_reduzido = df_defense[defense_cols]
print("Shape do DF Defensivo reduzido:", df_defense_reduzido.shape)


Shape do DF Ofensivo reduzido: (7133, 44)
Shape do DF Defensivo reduzido: (16148, 27)


##**LImpeza dos Dados**

In [38]:
print("🟦 OFENSIVO -----------------------")
print(df_offense_reduzido.info())
print("\n")
print("🟥 DEFENSIVO ----------------------")
print(df_defense_reduzido.info())

🟦 OFENSIVO -----------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7133 entries, 0 to 7132
Data columns (total 44 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   player_id                7133 non-null   object 
 1   player_name              7133 non-null   object 
 2   position                 7133 non-null   object 
 3   team                     7133 non-null   object 
 4   season                   7133 non-null   int64  
 5   age                      7133 non-null   float64
 6   games_played_season      7133 non-null   int64  
 7   height                   7133 non-null   float64
 8   weight                   7133 non-null   float64
 9   college                  7133 non-null   object 
 10  draft_year               7133 non-null   float64
 11  draft_round              6043 non-null   float64
 12  draft_pick               6043 non-null   float64
 13  years_exp                7115 non-null   fl

### **Ataque**

In [77]:
# Criando uma cópia do DF de ataque
df_ataque_limpo = df_offense_reduzido.copy()

# Loop para preencher os valores ausentes
for col in df_ataque_limpo.columns:
  if df_ataque_limpo[col].dtype == 'object':
    df_ataque_limpo[col].fillna('Desconhecido', inplace=True)
  else:
    df_ataque_limpo[col].fillna(0, inplace=True)

# Substituindo o 0 por 'Desconhecido' na coluna 'college'
if 'college' in df_ataque_limpo.columns:
  df_ataque_limpo['college'] = df_ataque_limpo['college'].replace(['0', 0], 'Desconhecido')


# Alterando o tipo das colunas de float para int
for col in ['draft_year', 'draft_round', 'draft_pick', 'years_exp', 'age']:
  if col in df_ataque_limpo.columns:
    df_ataque_limpo[col] = df_ataque_limpo[col].astype(int, errors='ignore')

print("Ataque limpo:", df_ataque_limpo.shape)
print("Nulos restantes:", df_ataque_limpo.isnull().sum().sum())

Ataque limpo: (7133, 44)
Nulos restantes: 0


###**Defesa**

In [81]:
# Criando uma cópia do DF de ataque
df_defesa_limpo = df_defense_reduzido.copy()

# Remove linhas que estão sem o nome e a posição dos jogador
df_defesa_limpo = df_defesa_limpo.dropna(subset=['player_name', 'position'])

# Loop para preencher os valores ausentes
for col in df_defesa_limpo.columns:
  if df_defesa_limpo[col].dtype == 'object':
    df_defesa_limpo[col].fillna('Desconhecido', inplace=True)
  else:
    df_defesa_limpo[col].fillna(0, inplace=True)

# Substituindo o 0 por 'Desconhecido' na coluna 'college'
if 'college' in df_defesa_limpo.columns:
  df_defesa_limpo['college'] = df_defesa_limpo['college'].replace(['0', 0], 'Desconhecido')

# Alterando o tipo das colunas de float para int
for col in ['draft_year', 'draft_round', 'draft_pick', 'age']:
    if col in df_defesa_limpo.columns:
        df_defesa_limpo[col] = df_defesa_limpo[col].astype(int, errors='ignore')

print("Defensivo limpo:", df_defesa_limpo.shape)
print("Nulos restantes:", df_defesa_limpo.isnull().sum().sum())

Defensivo limpo: (12332, 27)
Nulos restantes: 0


In [84]:
print('Amostra de 10 jogadores aleatórios do ataque:')
df_ataque_limpo.sample(10)

Amostra de 10 jogadores aleatórios do ataque:


Unnamed: 0,player_id,player_name,position,team,season,age,games_played_season,height,weight,college,...,ypr,total_yards,total_tds,fantasy_points_ppr,fantasy_points_standard,season_ypg,season_yptouch,season_average_ppr_ppg,offense_snaps,offense_pct
4840,00-0032160,Tyrell Williams,WR,DET,2021,29,1,76.0,205.0,Western Oregon,...,7.0,14.0,0.0,3.4,1.4,14.0,7.0,3.4,39.0,0.419355
344,00-0028149,Bruce Miller,FB,SF,2012,25,1,74.0,248.0,Central Florida,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.512821
3940,00-0034972,Alexander Mattison,RB,MIN,2019,21,13,71.0,220.0,Boise State,...,8.2,544.0,1.0,67.4,57.4,3.22,4.95,5.18,197.0,0.230409
3591,00-0031544,Amari Cooper,WR,DAL,2019,25,16,73.0,210.0,Alabama,...,15.05,1195.0,16.0,242.5,163.5,4.67,14.94,15.16,845.0,0.757848
1732,00-0032098,Daniel Brown,TE,BAL,2015,23,2,77.0,247.0,James Madison,...,9.4,47.0,0.0,7.7,2.7,11.75,9.4,3.85,55.0,0.433071
6306,00-0036510,Kenny Yeboah,TE,NYJ,2023,24,2,76.0,250.0,Mississippi,...,14.0,28.0,0.0,4.8,2.8,7.0,14.0,2.4,48.0,0.296296
5835,00-0036936,Rondale Moore,WR,ARI,2022,22,8,67.0,181.0,Purdue,...,10.1,409.0,2.0,87.9,46.9,6.39,8.69,10.99,459.0,0.822581
5586,00-0034577,Kyle Allen,QB,HOU,2022,26,2,75.0,210.0,Houston,...,0.0,429.0,2.0,18.94,18.94,107.25,5.36,9.47,126.0,1.0
3284,00-0033891,Zay Jones,WR,BUF,2018,23,16,74.0,200.0,East Carolina,...,11.64,652.0,15.0,159.2,103.2,2.55,11.26,9.95,941.0,0.888574
3484,00-0028114,Dwayne Harris,WR,LV,2019,31,1,70.0,215.0,East Carolina,...,7.0,7.0,0.0,1.7,0.7,7.0,7.0,1.7,2.0,0.04


In [83]:
print('Amostra de 10 jogadores aleatórios da defesa:')
df_defesa_limpo.sample(10)

Amostra de 10 jogadores aleatórios da defesa:


Unnamed: 0,player_id,player_name,position,team,season,age,games_played_season,height,weight,college,...,sack,qb_hit,interception,fumble_forced,safety,def_touchdown,defense_snaps,defense_pct,fantasy_points_ppr,fantasy_points_standard
5070,00-0027662,Carlos Dunlap,DE,CIN,2016,27,16.0,78.0,285.0,Florida,...,31.0,47.0,3.0,9,0.0,0.0,840.0,0.772769,0,0
11397,00-0032388,Deion Jones,LB,ATL,2021,27,16.0,73.0,222.0,LSU,...,8.0,15.0,0.0,2,0.0,0.0,1071.0,0.976299,0,0
5838,00-0032107,Angelo Blackson,DT,TEN,2016,24,8.0,76.0,318.0,Auburn,...,0.0,2.0,0.0,0,0.0,0.0,173.0,0.319778,0,0
5091,00-0027723,Al Woods,DT,TEN,2016,29,8.0,76.0,309.0,LSU,...,0.0,0.0,0.0,0,0.0,0.0,175.0,0.321691,0,0
10785,00-0035306,Darryl Johnson,DE,BUF,2020,23,5.0,78.0,253.0,North Carolina A&T,...,5.0,6.0,0.0,1,0.0,0.0,88.0,0.252874,0,0
15252,00-0034854,KhaDarel Hodge,WR,ATL,2024,29,1.0,74.0,210.0,Prairie View A&M,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0
2699,00-0026340,Joe Mays,ILB,KC,2014,0,5.0,0.0,0.0,Desconhecido,...,0.0,0.0,0.0,0,0.0,0.0,116.0,0.330484,0,0
12375,00-0036987,Brandon Stephens,CB,BAL,2021,24,17.0,73.0,215.0,SMU,...,0.0,5.0,0.0,0,0.0,0.0,744.0,0.683196,0,0
10199,00-0032165,Marcus Peters,CB,BAL,2020,27,16.0,72.0,197.0,Washington,...,3.0,9.0,10.0,12,0.0,1.0,1019.0,0.968631,0,0
12139,00-0036292,Jaylon Johnson,CB,CHI,2021,22,15.0,72.0,195.0,Utah,...,0.0,3.0,2.0,2,0.0,0.0,933.0,0.99467,0,0


##**Análise dos Ataques**

In [36]:
df_offense_reduzido.head()

Unnamed: 0,player_id,player_name,position,team,season,age,games_played_season,height,weight,college,...,ypr,total_yards,total_tds,fantasy_points_ppr,fantasy_points_standard,season_ypg,season_yptouch,season_average_ppr_ppg,offense_snaps,offense_pct
0,00-0000865,Charlie Batch,QB,PIT,2012,37.0,2,74.0,216.0,Eastern Michigan,...,,475.0,1.0,17.0,17.0,118.75,7.2,8.5,129.0,1.0
1,00-0004541,Donald Driver,WR,GB,2012,37.0,9,72.0,194.0,Alcorn State,...,9.62,77.0,4.0,27.7,19.7,0.95,9.62,3.08,121.0,0.190852
2,00-0006101,Tony Gonzalez,TE,ATL,2012,36.0,2,77.0,247.0,California,...,9.21,129.0,4.0,36.9,22.9,32.25,9.21,18.45,122.0,0.945736
3,00-0006101,Tony Gonzalez,TE,ATL,2012,36.0,16,77.0,247.0,California,...,10.0,930.0,16.0,228.0,135.0,3.63,10.0,14.25,966.0,0.911321
4,00-0007091,Matt Hasselbeck,QB,TEN,2012,36.0,8,76.0,235.0,Boston College,...,,1405.0,7.0,87.48,87.48,21.95,6.14,10.94,395.0,0.77451
