In [1]:
# Dependencies and Setup
import pandas as pd

In [2]:
#Creating function based on Clean_2 files
def clean_nba(file_import, game_cutoff = 43):
    print(f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')    
    print(f'Importing {file_import}...')
    print(f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    
    #Open CSV
    db = pd.read_csv(f'../ResourcesFull/{file_import}.csv', encoding = "ISO-8859-1")
    
    #Extract Player ID
    db[["Player","Player_ID"]] = db['Player'].str.split("\\", 1, expand=True)
    
    #Cutoff Low games and traded players' second team
    db = db[db["G"] > game_cutoff]
    
    #Remove TOT team entries (for Total)
    db_filtered = db[db.Tm != "TOT"]
    print(f'There are {db_filtered["Player_ID"].duplicated().sum()} duplicated players to check')
    
    #player_names checks (now less vital with ids)
    player_names = db_filtered["Player"]
    character_issues = player_names[player_names.str.contains("[^\w'\-\.\* ]", regex=True)]
    print(f'These names may need manual correcting:\n{character_issues}')
          
    #Allstar Name corrections
    corrected_names = []
    for player in player_names:
        if player.find("*") != (-1):
            player = player.replace("*","")
        elif player == "Ömer A??k":
            player = player.replace("A??k","Asik")
        corrected_names.append(player)
    db_filtered["Player"] = corrected_names
    
    #checking any missing point data
    print(f'The only missing data should be 3P shots:\n{db_filtered.isna().sum()}')
    
    #fill NA 3-pointers with zeroes (all cases have been from 0 3P shots attempted)
    db_filtered["3P%"] = db_filtered["3P%"].fillna(0)
          
    #moving year to front
    move_col = db_filtered.pop('YR')
    db_filtered.insert(1, 'YR', move_col)
          
    #moving id to front
    move_col = db_filtered.pop('Player_ID')
    db_filtered.insert(1, 'Player_ID', move_col)
    
    #creating cleaned csv
    db_filtered.to_csv(f'../Resources_Cleaned/{file_import}_clean.csv', encoding = "ISO-8859-1")
    character_issues.to_csv(f'../Resources_Cleaned/{file_import}_brokennames.csv', encoding = "ISO-8859-1")
    print(f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print(f'Cleaning Complete')
    print(f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

In [3]:
for year in range(2008, 2022):
    file_import = f'stats{year}'
    clean_nba(file_import)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Importing stats2008...
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are 0 duplicated players to check
These names may need manual correcting:
53         Andris Biedri?
160           Goran Dragi?
277            Marko Jari?
307           Nenad Krsti?
358          Darko Mili?i?
386       Rasho Nesterovi?
414         Sasha Pavlovi?
432    Vladimir Radmanovi?
505        Peja Stojakovi?
529          Hedo Türko?lu
532              Roko Uki?
537          Sasha Vuja?i?
Name: Player, dtype: object
The only missing data should be 3P shots:
Rk            0
Player        0
Pos           0
Age           0
Tm            0
G             0
GS            0
MP            0
FG            0
FGA           0
FG%           0
3P            0
3PA           0
3P%          24
2P            0
2PA           0
2P%           0
eFG%          0
FT            0
FTA           0
FT%           0
ORB           0
DRB           0
TRB           0
AST          

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



The only missing data should be 3P shots:
Rk            0
Player        0
Pos           0
Age           0
Tm            0
G             0
GS            0
MP            0
FG            0
FGA           0
FG%           0
3P            0
3PA           0
3P%          18
2P            0
2PA           0
2P%           0
eFG%          0
FT            0
FTA           0
FT%           1
ORB           0
DRB           0
TRB           0
AST           0
STL           0
BLK           0
TOV           0
PF            0
PTS           0
YR            0
GMSCR         0
Player_ID     0
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Cleaning Complete
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Importing stats2016...
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are 0 duplicated players to check
These names may need manual correcting:
49          D?vis Bert?ns
56       Bojan Bogdanovi?
136          Goran Dragi?
261  