In [203]:
import pandas as pd
import csv
from collections import defaultdict
import networkx as nx

In [204]:
nba = pd.read_csv("all_seasons.csv")

In [205]:
# Clean year to only have rookie data
# Extract the first four characters of the 'season' column, convert to integer
nba['season_year'] = nba['season'].str[:4].astype(int)

# Replace "undrafted" with 0 in the 'draft_year' column
nba['draft_year'] = nba['draft_year'].replace('Undrafted', 0)
nba.to_csv("test.csv")
nba['draft_year'] = nba['draft_year'].astype(int)

In [206]:
# Filter the DataFrame
rookies = nba[nba['draft_year'] == nba['season_year']]
rookies

Unnamed: 0.1,Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,college,country,draft_year,draft_round,...,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,season_year
0,0,Randy Livingston,HOU,22.0,193.04,94.800728,Louisiana State,USA,1996,2,...,1.5,2.4,0.3,0.042,0.071,0.169,0.487,0.248,1996-97,1996
38,38,Erick Dampier,IND,21.0,210.82,120.201880,Mississippi State,USA,1996,1,...,4.1,0.6,-2.0,0.107,0.216,0.218,0.451,0.074,1996-97,1996
50,50,Jerome Williams,DET,24.0,205.74,93.439952,Georgetown,USA,1996,1,...,1.5,0.2,3.0,0.144,0.182,0.181,0.419,0.071,1996-97,1996
67,67,John Wallace,NYK,23.0,205.74,102.058200,Syracuse,USA,1996,1,...,2.3,0.5,2.7,0.080,0.148,0.204,0.571,0.081,1996-97,1996
72,72,Jermaine O'Neal,POR,18.0,210.82,102.511792,,USA,1996,1,...,2.8,0.2,1.3,0.099,0.198,0.199,0.494,0.030,1996-97,1996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12793,12793,Jake LaRavia,MEM,21.0,200.66,106.594120,Wake Forest,USA,2022,1,...,1.8,0.6,-7.6,0.044,0.100,0.117,0.531,0.068,2022-23,2022
12814,12814,Josh Minott,MIN,20.0,203.20,92.986360,Memphis,USA,2022,2,...,1.7,0.3,10.4,0.054,0.194,0.176,0.582,0.065,2022-23,2022
12819,12819,Johnny Davis,WAS,21.0,193.04,88.450440,Wisconsin,USA,2022,1,...,2.3,1.0,-6.2,0.020,0.125,0.192,0.446,0.100,2022-23,2022
12824,12824,Jaylin Williams,OKC,21.0,205.74,108.862080,Arkansas,USA,2022,2,...,4.9,1.6,-4.6,0.045,0.230,0.133,0.559,0.109,2022-23,2022


In [207]:
rookies = rookies.dropna()
rookies.isna().sum()

Unnamed: 0           0
player_name          0
team_abbreviation    0
age                  0
player_height        0
player_weight        0
college              0
country              0
draft_year           0
draft_round          0
draft_number         0
gp                   0
pts                  0
reb                  0
ast                  0
net_rating           0
oreb_pct             0
dreb_pct             0
usg_pct              0
ts_pct               0
ast_pct              0
season               0
season_year          0
dtype: int64

In [208]:
def create_network_with_players_and_points(data):
    # Create a dictionary to store the adjacency list
    adjacency_list = defaultdict(list)

    # Iterate over each row in the data
    for index, row in data.iterrows():
        college = row['college']
        team = row['team_abbreviation']
        player_name = row['player_name']
        draft_year = row['draft_year']

        # Append the tuple (team, player_name, player_points) to the college's list
        adjacency_list[college].append((team, player_name, draft_year))

    return adjacency_list

In [209]:
adj_list = create_network_with_players_and_points(rookies)

In [210]:
with open('adj_list.csv', 'w') as csv_file:
    writer = csv.writer(csv_file)
    for key, value in adj_list.items():
       writer.writerow([key, value])

In [211]:
def most_connected_node(adjacency_list):
    # Create a dictionary to count connections
    connection_counts = defaultdict(int)

    # Iterate over each college in the adjacency list
    for college, connections in adjacency_list.items():
        # For each connection, get the team abbreviation and count it
        for (team, player, year) in connections:
            connection_counts[college] += 1  # Count how many players the college has sent
            connection_counts[team] += 1     # Count how many players are associated with the team

    # Determine the node with the maximum number of connections
    most_connected = max(connection_counts, key=connection_counts.get)

    return most_connected, connection_counts[most_connected]

In [212]:
# Get the most connected node
most_connected, connections = most_connected_node(adj_list)
print(f"The most connected node is '{most_connected}' with {connections} connections.")


The most connected node is 'Kentucky' with 54 connections.


In [213]:
# # Create a directed graph
# G = nx.DiGraph()

# # Add edges to the graph
# for college, player_info in adj_list.items():
#     for team, player, pts in player_info:
#         G.add_edge(college, team, player=player, pts=pts)

# # Calculate out-degree for colleges (how many teams they supplied players to)
# college_out_degrees = {node: G.out_degree(node) for node in adj_list.keys()}

# # Calculate in-degree for NBA teams (how many colleges supplied their players)
# team_in_degrees = {node: G.in_degree(node) for node in G.nodes() if node not in adj_list.keys()}

# print("College Out-Degrees:")
# for college, degree in college_out_degrees.items():
#     print(f"{college}: {degree}")

In [214]:
# print("\nNBA Team In-Degrees:")
# for team, degree in team_in_degrees.items():
#     print(f"{team}: {degree}")