In [5]:
import pandas as pd

df = pd.read_csv('/Users/ahmed2/Downloads/acc_players-2324F.csv')

print(df.head())

  Unnamed: 0     Unnamed: 1 Unnamed: 2 Unnamed: 3      Unnamed: 4 Totals  \
0         Rk         Player      Class        Pos          School      G   
1          1   Amaree Abram         SO          G    Georgia Tech     10   
2          2   Sola Adebisi         FR          F   Florida State      7   
3          3  Prince Aligbe         SO          F  Boston College     35   
4          4     Abe Atiyeh         SR          G  Boston College      4   

  Unnamed: 6 Unnamed: 7 Unnamed: 8 Unnamed: 9  ... Unnamed: 11 Unnamed: 12  \
0         MP        TRB        AST        STL  ...         TOV          PF   
1        108         17         11          1  ...           9          10   
2          9          1          1          0  ...           1           1   
3        651        119         21         15  ...          30          51   
4          6          0          0          0  ...           1           0   

  Unnamed: 13 Shooting Unnamed: 15 Unnamed: 16 Unnamed: 17 Advanced  \
0  

In [39]:
# convert the columns to be numeric
df[['PTS', 'MP', 'TRB']] = df[['PTS', 'MP', 'TRB']].apply(pd.to_numeric, errors='coerce')

# calculate the total poins scored by all the players combined 
total_points = df['PTS'].sum()

# find the player who has the most minutes played 
max_minutes_player = df.loc[df['MP'].idxmax()]

# top 5 plaers in terms of total rebounds 
top_rebounders = df.nlargest(5, 'TRB')

# display results 
print(f"Total points scored by all players: {total_points}")

print("\nPlayer with the most minutes played:")
print(f"Name: {max_minutes_player['Player']}")
print(f"Team: {max_minutes_player['School']}")
print(f"Minutes Played: {max_minutes_player['MP']}")

print("\nTop 5 players in terms of total rebounds:")
print(top_rebounders[['Player', 'School', 'TRB']])

Total points scored by all players: 38411

Player with the most minutes played:
Name: Casey Morsell
Team: NC State
Minutes Played: 1333

Top 5 players in terms of total rebounds:
              Player          School  TRB
5      Armando Bacot  North Carolina  380
178  Ian Schieffelin         Clemson  340
90   Harrison Ingram  North Carolina  327
44    Mohamed Diarra        NC State  311
149    Norchad Omier      Miami (FL)  309


In [41]:
# convert the columns to be numeric
df[['MP', 'AST', 'BLK']] = df[['MP', 'AST', 'BLK']].apply(pd.to_numeric, errors='coerce')

# new DF that contains only the players who played more than 500 minutes 
df_filtered = df[df['MP'] > 500].copy()

# player with the highest total assists 
top_assist_player = df_filtered.loc[df_filtered['AST'].idxmax()]

# top 3 assist leaders in the league 
top_3_assists = df.nlargest(3, 'AST')

# top 3 shot blockers 
top_3_blockers = df.nlargest(3, 'BLK')

# display results 
print(f"Number of players who played more than 500 minutes: {len(df_filtered)}")

print("\nPlayer with the highest total assists (among those who played >500 minutes):")
print(f"Name: {top_assist_player['Player']}")
print(f"Team: {top_assist_player['School']}")
print(f"Total Assists: {top_assist_player['AST']}")

print("\nTop 3 Assist Leaders in the League:")
print(top_3_assists[['Player', 'School', 'AST']])

print("\nTop 3 Shot Blockers in the League:")
print(top_3_blockers[['Player', 'School', 'BLK']])

Number of players who played more than 500 minutes: 98

Player with the highest total assists (among those who played >500 minutes):
Name: Reece Beekman
Team: Virginia
Total Assists: 212

Top 3 Assist Leaders in the League:
             Player          School  AST
7     Reece Beekman        Virginia  212
215  Jaeden Zackery  Boston College  152
21    Elliot Cadeau  North Carolina  150

Top 3 Shot Blockers in the League:
            Player          School  BLK
51       Ryan Dunn        Virginia   77
157   Quinten Post  Boston College   61
5    Armando Bacot  North Carolina   56


In [43]:
# convert the columns to be numeric
df[['PTS', 'AST']] = df[['PTS', 'AST']].apply(pd.to_numeric, errors='coerce')

# group players by the school and then calculate the total points and assists of each
school_points = df.groupby('School')['PTS'].sum().sort_values(ascending=False)
school_assists = df.groupby('School')['AST'].sum().sort_values(ascending=False)

# display results 
print("Total points scored by each school:")
for school, points in school_points.items():
    print(f"{school}: {points:.0f}")

print("\nTotal assists for each team:")
for school, assists in school_assists.items():
    print(f"{school}: {assists:.0f}")

print("\nTop 3 schools by total points scored:")
for school in school_points.head(3).index:
    print(f"{school}: {school_points[school]:.0f} points, {school_assists[school]:.0f} assists")

Total points scored by each school:
NC State: 3101
North Carolina: 3032
Duke: 2830
Clemson: 2785
Wake Forest: 2733
Boston College: 2667
Virginia Tech: 2547
Florida State: 2526
Pittsburgh: 2495
Syracuse: 2442
Miami (FL): 2424
Louisville: 2304
Georgia Tech: 2272
Virginia: 2140
Notre Dame: 2113

Total assists for each team:
Duke: 551
NC State: 536
North Carolina: 536
Clemson: 533
Virginia Tech: 514
Boston College: 509
Virginia: 509
Miami (FL): 454
Pittsburgh: 452
Syracuse: 442
Wake Forest: 429
Georgia Tech: 425
Florida State: 406
Louisville: 356
Notre Dame: 335

Top 3 schools by total points scored:
NC State: 3101 points, 536 assists
North Carolina: 3032 points, 536 assists
Duke: 2830 points, 551 assists


In [57]:
# Reflection:
# Working with CSV files and Pandas DataFrames on this ACC basketball statistics assignment taught me valuable data manipulation abilities. I learnt how to read sports performance data files, convert statistical columns to appropriate numerical types, and do basic analysis such as grouping players by school and sorting by performance measures like points and assists. These approaches are critical for sports data analysis and will be valuable in future projects across other industries. For example, I could use these skills to analyze financial data in CSV format for a commercial project, evaluate environmental sensor readings for a climate study, or review patient records for a healthcare research venture.
# The most challenging part was getting started, especially converting columns to numeric types. I encountered errors initially, which highlighted the importance of proper data type handling. Overcoming this involved carefully examining the data and understanding pandas functions like pd.to_numeric(). This experience show me how crucial data cleaning is in the analysis process.
# The skills gained from analyzing basketball statistics can be applied to other datasets. Similar techniques could be used in business to analyze sales data or in healthcare to study patient outcomes. The assignment has prepared me for future data challenges by emphasizing both technical skills and problem-solving abilities. It's given me a solid foundation for handling real-world data, which is often messy and unpredictable.