# NCAA to NFL Draft Predictions – Exploratory Data Analysis (EDA)

This notebook explores NCAA player stats, cleans the data, and visualizes key trends related to NFL Draft outcomes.

## 1. Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# visualization style
sns.set_theme(style="whitegrid")


## 2. Load Data

In [3]:
passing = pd.read_csv('../data/raw/CFB_Passing_2021.csv')
receiving = pd.read_csv('../data/raw/CFB_Receiving_2021.csv')
rushing = pd.read_csv('../data/raw/CFB_Rushing_2021.csv')
draft = pd.read_csv('../data/raw/NFL_Draft_2023.csv')

## 3. Inspect Data
- Look at shape, data types, and missing values.
- Summarize numeric stats.

In [4]:
passing.info()
receiving.info()
rushing.info()
draft.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532 entries, 0 to 531
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Rk                 532 non-null    int64  
 1   Player             532 non-null    object 
 2   Team               532 non-null    object 
 3   Conf               532 non-null    object 
 4   G                  532 non-null    int64  
 5   Cmp                532 non-null    int64  
 6   Att                532 non-null    int64  
 7   Cmp%               532 non-null    float64
 8   Yds                532 non-null    int64  
 9   TD                 532 non-null    int64  
 10  TD%                532 non-null    float64
 11  Int                532 non-null    int64  
 12  Int%               532 non-null    float64
 13  Y/A                532 non-null    float64
 14  AY/A               532 non-null    float64
 15  Y/C                465 non-null    float64
 16  Y/G                532 non

## 4a. Cleaning Draft Data
- Handle missing values
- Rename columns for consistency
- Drop duplicates

In [5]:
# Clean draft data: 
# drop irrelevant columns and rename duplicates for clarity

# Make a copy to avoid overwriting the original
draft_clean = draft.copy()

# Drop empty or irrelevant columns
draft_clean = draft_clean.drop(columns=[
    'To','AP1', 'PB', 'St', 'wAV', 'DrAV', 'Unnamed: 28', '-9999'
    ], errors='ignore')

# Rename confusing duplicate columns
draft_clean = draft_clean.rename(columns={
    'Cmp': 'Pass_Cmp',
    'Att': 'Pass_Att',
    'Yds': 'Pass_Yds',
    'TD': 'Pass_TD',
    'Int': 'Pass_Int',
    'Att.1': 'Rush_Att',
    'Yds.1': 'Rush_Yds',
    'TD.1': 'Rush_TD',
    'Rec': 'Rec_Rec',
    'Yds.2': 'Rec_Yds',
    'TD.2': 'Rec_TD',
    'Int.1': 'Def_Int',
})

# Check result
draft_clean.head()

Unnamed: 0,Rnd,Pick,Tm,Player,Pos,Age,G,Pass_Cmp,Pass_Att,Pass_Yds,...,Rush_Att,Rush_Yds,Rush_TD,Rec_Rec,Rec_Yds,Rec_TD,Solo,Def_Int,Sk,College/Univ
0,1,1,CAR,Bryce Young,QB,22.0,34.0,636.0,1055.0,6033.0,...,92.0,555.0,7.0,0.0,0.0,0.0,,,,Alabama
1,1,2,HOU,C.J. Stroud,QB,21.0,36.0,734.0,1148.0,8667.0,...,108.0,492.0,3.0,1.0,0.0,0.0,,,,Ohio St.
2,1,3,HOU,Will Anderson,LB,22.0,33.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,66.0,,21.0,Alabama
3,1,4,IND,Anthony Richardson,QB,21.0,16.0,176.0,348.0,2391.0,...,115.0,634.0,10.0,1.0,-1.0,0.0,,,,Florida
4,1,5,SEA,Devon Witherspoon,DB,22.0,33.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,134.0,1.0,4.5,Illinois


## 4b. Cleaning Passing Data

In [6]:
# Clean passing data: 
# drop irrelevant columns and rename duplicates for clarity

# Make a copy to avoid overwriting the original
passing_clean = passing.copy()

# Drop empty or irrelevant columns
passing_clean = passing_clean.drop(columns=[
 'Rk','Cmp%','TD%','Int%', 'Y/A','AY/A','Y/C','Y/G','Awards','Player-additional'
    ], errors='ignore')

# Rename confusing duplicate columns
passing_clean = passing_clean.rename(columns={
    # 'Cmp': 'Pass_Cmp',
    # 'Att': 'Pass_Att',
    # 'Yds': 'Pass_Yds',
    
})

# Check result
passing_clean.head()

Unnamed: 0,Player,Team,Conf,G,Cmp,Att,Yds,TD,Int,Rate
0,Bailey Zappe*,Western Kentucky,CUSA,14,475,687,5967,62,11,168.7
1,Bryce Young*,Alabama,SEC,15,366,547,4872,47,7,167.5
2,Will Rogers*,Mississippi State,SEC,13,505,683,4739,36,9,147.0
3,Brennan Armstrong,Virginia,ACC,11,326,500,4449,31,10,156.4
4,C.J. Stroud*,Ohio State,Big Ten,12,317,441,4435,44,6,186.6


## 4c. Cleaning Receiving Data

In [None]:
# Clean receiving data: 
# drop irrelevant columns and rename duplicates for clarity

# Make a copy to avoid overwriting the original
receiving_clean = receiving.copy()

# Drop empty or irrelevant columns
receiving_clean = receiving_clean.drop(columns=[

    ], errors='ignore')

# Rename confusing duplicate columns
receiving_clean = receiving_clean.rename(columns={
    # 'Cmp': 'Pass_Cmp',
    # 'Att': 'Pass_Att',
    # 'Yds': 'Pass_Yds',
    
})

# Check result
receiving_clean.head()

Unnamed: 0,Rk,Player,Team,Conf,G,Rec,Yds,Y/R,TD,Y/G,...,Yds.1,Y/A,TD.1,Y/G.1,Plays,Yds.2,Avg,TD.2,Awards,-9999
0,1,Jerreth Sterns*,Western Kentucky,CUSA,14,150,1902,12.7,17,135.9,...,17,3.4,0,1.2,155,1919,12.4,17,,jerreth-sterns-1
1,2,Deven Thompkins*,Utah State,MWC,14,102,1704,16.7,10,121.7,...,27,5.4,0,1.9,107,1731,16.2,10,,deven-thompkins-1
2,3,Jaxon Smith-Njigba*,Ohio State,Big Ten,13,95,1606,16.9,9,123.5,...,0,,0,0.0,95,1606,16.9,9,,jaxon-smith-njigba-1
3,4,Jordan Addison*,Pitt,ACC,14,100,1593,15.9,17,113.8,...,56,8.0,1,4.0,107,1649,15.4,18,AA,jordan-addison-1
4,5,Jameson Williams*,Alabama,SEC,15,79,1572,19.9,15,104.8,...,23,7.7,0,1.5,82,1595,19.5,15,,jameson-williams-1


## 4d. Cleaning Rushing Data

In [16]:
# Clean rushing data: 
# drop irrelevant columns and rename duplicates for clarity

# Make a copy to avoid overwriting the original
rushing_clean = rushing.copy()

# Drop empty or irrelevant columns
rushing_clean = rushing_clean.drop(columns=[

    ], errors='ignore')

# Rename confusing duplicate columns
rushing_clean = rushing_clean.rename(columns={
    # 'Cmp': 'Pass_Cmp',
    # 'Att': 'Pass_Att',
    # 'Yds': 'Pass_Yds',
    
})

# Check result
rushing_clean.head()

Unnamed: 0,Rk,Player,Team,Conf,G,Att,Yds,Y/A,TD,Y/G,...,Yds.1,Y/R,TD.1,Y/G.1,Plays,Yds.2,Avg,TD.2,Awards,-9999
0,1,Lew Nichols*,Central Michigan,MAC,13,341,1848,5.4,16,142.2,...,338,8.5,2,26.0,381,2186,5.7,18,,lew-nichols-1
1,2,Kenneth Walker III*,Michigan State,Big Ten,12,263,1636,6.2,18,136.3,...,89,6.8,1,7.4,276,1725,6.3,19,H-6AACamp,kenneth-walker-iii-1
2,3,Abram Smith*,Baylor,Big 12,14,257,1621,6.3,12,115.8,...,75,5.8,0,5.4,270,1696,6.3,12,,abram-smith-1
3,4,Tyler Allgeier*,BYU,Ind,13,276,1606,5.8,23,123.5,...,199,7.1,0,15.3,304,1805,5.9,23,,tyler-allgeier-1
4,5,Tyler Badie*,Missouri,SEC,12,268,1604,6.0,14,133.7,...,330,6.1,4,27.5,322,1934,6.0,18,,tyler-badie-1


## 5. Next Steps
- Clean passing, rushing, and receiving datasets
- EDA on college data
- Begin exploratory visualizations (e.g., pick vs. yards)
