In [152]:
import pandas as pd
import numpy as np

In [153]:
mvps = pd.read_csv('../data/raw/1981_2024_mvps.csv')
standings = pd.read_csv('../data/raw/1981_2024_standings.csv')
print(standings.head(5))

   Year                Team     W     L   W/L%    GB   PS/G   PA/G   SRS
0  1981   Atlantic Division   NaN   NaN    NaN   NaN    NaN    NaN   NaN
1  1981      Boston Celtics  62.0  20.0  0.756     —  109.9  104.0  6.05
2  1981  Philadelphia 76ers  62.0  20.0  0.756     —  111.7  103.8  7.76
3  1981     New York Knicks  50.0  32.0  0.610  12.0  107.9  106.3  2.00
4  1981  Washington Bullets  39.0  43.0  0.476  23.0  105.6  105.6  0.42


There is Divisions values inside standings so we want to remove that

In [154]:
standings = standings[standings.Team.str.contains('Division') == False]
print(standings.head(5))

   Year                Team     W     L   W/L%    GB   PS/G   PA/G   SRS
1  1981      Boston Celtics  62.0  20.0  0.756     —  109.9  104.0  6.05
2  1981  Philadelphia 76ers  62.0  20.0  0.756     —  111.7  103.8  7.76
3  1981     New York Knicks  50.0  32.0  0.610  12.0  107.9  106.3  2.00
4  1981  Washington Bullets  39.0  43.0  0.476  23.0  105.6  105.6  0.42
5  1981     New Jersey Nets  24.0  58.0  0.293  38.0  106.9  113.0 -5.15


We only want Win percentage and we only want to get the abbreviation of each teams to correlate what we have with our MVPs datas

In [155]:
standings = standings.drop(columns=["W","L",'GB', 'PS/G', 'PA/G', 'SRS'])
print(standings.head(5))

   Year                Team   W/L%
1  1981      Boston Celtics  0.756
2  1981  Philadelphia 76ers  0.756
3  1981     New York Knicks  0.610
4  1981  Washington Bullets  0.476
5  1981     New Jersey Nets  0.293


In [156]:
team_map = {
    'Atlanta Hawks': 'ATL',
    'Boston Celtics': 'BOS',
    'Brooklyn Nets': 'BRK',
    'Charlotte Hornets': 'CHO',
    'Chicago Bulls': 'CHI',
    'Cleveland Cavaliers': 'CLE',
    'Dallas Mavericks': 'DAL',
    'Denver Nuggets': 'DEN',
    'Detroit Pistons': 'DET',
    'Golden State Warriors': 'GSW',
    'Houston Rockets': 'HOU',
    'Indiana Pacers': 'IND',
    'Los Angeles Clippers': 'LAC',
    'Los Angeles Lakers': 'LAL',
    'Memphis Grizzlies': 'MEM',
    'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL',
    'Minnesota Timberwolves': 'MIN',
    'New Orleans Pelicans': 'NOP',
    'New York Knicks': 'NYK',
    'Oklahoma City Thunder': 'OKC',
    'Orlando Magic': 'ORL',
    'Philadelphia 76ers': 'PHI',
    'Phoenix Suns': 'PHO',
    'Portland Trail Blazers': 'POR',
    'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS',
    'Toronto Raptors': 'TOR',
    'Utah Jazz': 'UTA',
    'Washington Wizards': 'WAS',
    'Washington Bullets': 'WSB',
    "New Jersey Nets": "NJN",
    "Kansas City Kings": "KCK",
    "San Diego Clippers": "SDC",
    "Seattle SuperSonics": "SEA",
    "Vancouver Grizzlies": "VAN",
    "New Orleans Hornets": "NOH",
    "Charlotte Bobcats": "CHA",
    "New Orleans/Oklahoma City Hornets": "NOK",
}
standings['Team'] = standings['Team'].map(team_map)
print(standings.head(5))

   Year Team   W/L%
1  1981  BOS  0.756
2  1981  PHI  0.756
3  1981  NYK  0.610
4  1981  WSB  0.476
5  1981  NJN  0.293


For each player in the MVPs datas we add the W/L% for their corresponding season and remove all non important or redondant columns

In [157]:
mvps = mvps.drop(columns=["Age", "First", "Pts Won", "Pts Max", "WS"])
print(mvps.head(5))

   Year               Player   Tm  Share   G    MP   PTS   TRB  AST  STL  BLK  \
0  1981        Julius Erving  PHI  0.658  82  35.0  24.6   8.0  4.4  2.1  1.8   
1  1981           Larry Bird  BOS  0.613  82  39.5  21.2  10.9  5.5  2.0  0.8   
2  1981  Kareem Abdul-Jabbar  LAL  0.414  80  37.2  26.2  10.3  3.4  0.7  2.9   
3  1981         Moses Malone  HOU  0.261  80  40.6  27.8  14.8  1.8  1.0  1.9   
4  1981        George Gervin  SAS  0.120  82  33.7  27.1   5.1  3.2  1.1  0.7   

     FG%    3P%    FT%  WS/48  
0  0.521  0.222  0.787  0.231  
1  0.478  0.270  0.863  0.160  
2  0.574  0.000  0.766  0.230  
3  0.522  0.333  0.757  0.202  
4  0.492  0.257  0.826  0.182  


In [158]:
mvps = pd.merge(mvps, standings[['Year', 'Team', 'W/L%']], left_on=['Year', 'Tm'], right_on=["Year", "Team"], how='left')
mvps = mvps.drop(columns=["Team"])
print(mvps.head(5))
mvps.to_csv("../data/processed/mvps.csv", index=False)

   Year               Player   Tm  Share   G    MP   PTS   TRB  AST  STL  BLK  \
0  1981        Julius Erving  PHI  0.658  82  35.0  24.6   8.0  4.4  2.1  1.8   
1  1981           Larry Bird  BOS  0.613  82  39.5  21.2  10.9  5.5  2.0  0.8   
2  1981  Kareem Abdul-Jabbar  LAL  0.414  80  37.2  26.2  10.3  3.4  0.7  2.9   
3  1981         Moses Malone  HOU  0.261  80  40.6  27.8  14.8  1.8  1.0  1.9   
4  1981        George Gervin  SAS  0.120  82  33.7  27.1   5.1  3.2  1.1  0.7   

     FG%    3P%    FT%  WS/48   W/L%  
0  0.521  0.222  0.787  0.231  0.756  
1  0.478  0.270  0.863  0.160  0.756  
2  0.574  0.000  0.766  0.230  0.659  
3  0.522  0.333  0.757  0.202  0.488  
4  0.492  0.257  0.826  0.182  0.634  
