In [148]:
import pandas as pd
import csv
from collections import defaultdict
import networkx as nx

In [149]:
nba = pd.read_csv("all_seasons.csv")

In [150]:
# Clean year to only have rookie data
# Extract the first four characters of the 'season' column, convert to integer
nba['season_year'] = nba['season'].str[:4].astype(int)

# Replace "undrafted" with 0 in the 'draft_year' column
nba['draft_year'] = nba['draft_year'].replace('Undrafted', 0)
nba.to_csv("test.csv")
nba['draft_year'] = nba['draft_year'].astype(int)

In [151]:
# Filter the DataFrame
rookies = nba[nba['draft_year'] == nba['season_year']]
rookies

Unnamed: 0.1,Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,college,country,draft_year,draft_round,...,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,season_year
0,0,Randy Livingston,HOU,22.0,193.04,94.800728,Louisiana State,USA,1996,2,...,1.5,2.4,0.3,0.042,0.071,0.169,0.487,0.248,1996-97,1996
38,38,Erick Dampier,IND,21.0,210.82,120.201880,Mississippi State,USA,1996,1,...,4.1,0.6,-2.0,0.107,0.216,0.218,0.451,0.074,1996-97,1996
50,50,Jerome Williams,DET,24.0,205.74,93.439952,Georgetown,USA,1996,1,...,1.5,0.2,3.0,0.144,0.182,0.181,0.419,0.071,1996-97,1996
67,67,John Wallace,NYK,23.0,205.74,102.058200,Syracuse,USA,1996,1,...,2.3,0.5,2.7,0.080,0.148,0.204,0.571,0.081,1996-97,1996
72,72,Jermaine O'Neal,POR,18.0,210.82,102.511792,,USA,1996,1,...,2.8,0.2,1.3,0.099,0.198,0.199,0.494,0.030,1996-97,1996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12793,12793,Jake LaRavia,MEM,21.0,200.66,106.594120,Wake Forest,USA,2022,1,...,1.8,0.6,-7.6,0.044,0.100,0.117,0.531,0.068,2022-23,2022
12814,12814,Josh Minott,MIN,20.0,203.20,92.986360,Memphis,USA,2022,2,...,1.7,0.3,10.4,0.054,0.194,0.176,0.582,0.065,2022-23,2022
12819,12819,Johnny Davis,WAS,21.0,193.04,88.450440,Wisconsin,USA,2022,1,...,2.3,1.0,-6.2,0.020,0.125,0.192,0.446,0.100,2022-23,2022
12824,12824,Jaylin Williams,OKC,21.0,205.74,108.862080,Arkansas,USA,2022,2,...,4.9,1.6,-4.6,0.045,0.230,0.133,0.559,0.109,2022-23,2022


In [152]:
def create_network_with_players_and_points(data):
    # Create a dictionary to store the adjacency list
    adjacency_list = defaultdict(list)

    # Iterate over each row in the data
    for index, row in data.iterrows():
        college = row['college']
        team = row['team_abbreviation']
        player_name = row['player_name']
        draft_year = row['draft_year']

        # Append the tuple (team, player_name, player_points) to the college's list
        adjacency_list[college].append((team, player_name, draft_year))

    return adjacency_list

In [153]:
adj_list2 = create_network_with_players_and_points(rookies)

In [154]:
adj_list2

defaultdict(list,
            {'Louisiana State': [('HOU', 'Randy Livingston', 1996),
              ('SAC', 'Jabari Smith', 2000),
              ('VAN', 'Stromile Swift', 2000),
              ('NOK', 'Brandon Bass', 2005),
              ('CHI', 'Tyrus Thomas', 2006),
              ('BOS', 'Glen Davis', 2007),
              ('GSW', 'Anthony Randolph', 2008),
              ('NOH', 'Marcus Thornton', 2009),
              ('MIL', "Johnny O'Bryant III", 2014),
              ('MEM', 'Jarell Martin', 2015),
              ('BOS', 'Jordan Mickey', 2015),
              ('BOS', 'Tremont Waters', 2019),
              ('ATL', 'Skylar Mays', 2020),
              ('BKN', 'Cam Thomas', 2021),
              ('HOU', 'Tari Eason', 2022)],
             'Mississippi State': [('IND', 'Erick Dampier', 1996),
              ('MEM', 'Lawrence Roberts', 2005),
              ('PHI', 'Arnett Moultrie', 2012),
              ('SAS', 'Quinndary Weatherspoon', 2019),
              ('BKN', 'Reggie Perry', 2020),
      

In [155]:
with open('dict.csv', 'w') as csv_file:
    writer = csv.writer(csv_file)
    for key, value in adj_list2.items():
       writer.writerow([key, value])

In [156]:
# Create a directed graph
G = nx.DiGraph()

# Add edges to the graph
for college, player_info in adj_list2.items():
    for team, player, pts in player_info:
        G.add_edge(college, team, player=player, pts=pts)

# Calculate out-degree for colleges (how many teams they supplied players to)
college_out_degrees = {node: G.out_degree(node) for node in adj_list2.keys()}

# Calculate in-degree for NBA teams (how many colleges supplied their players)
team_in_degrees = {node: G.in_degree(node) for node in G.nodes() if node not in adj_list2.keys()}

print("College Out-Degrees:")
for college, degree in college_out_degrees.items():
    print(f"{college}: {degree}")

College Out-Degrees:
Louisiana State: 12
Mississippi State: 6
Georgetown: 8
Syracuse: 14
nan: 34
North Carolina: 19
Michigan State: 15
Texas Tech: 7
Western Kentucky: 4
Indiana: 11
Arkansas-Little Rock: 1
Georgia: 7
California: 7
Louisville: 13
Alabama: 9
Georgia Tech: 12
Tennessee: 9
Santa Clara: 2
Kentucky: 24
Wright State: 1
Connecticut: 14
North Carolina State: 7
Memphis: 14
Drexel: 1
Murray State: 4
Massachusetts: 3
Villanova: 15
Washington State: 4
Central State (OH): 1
Arizona: 22
West Florida: 1
Marquette: 9
Michigan: 14
Iowa State: 10
Utah: 7
Wake Forest: 7
Maryland: 11
Kansas: 20
Southern California: 10
San Jose State: 1
Colgate: 1
College of Charleston: 4
Bradley: 2
Bowling Green: 2
Cincinnati: 8
Providence: 6
Colorado: 8
New Mexico: 5
UCLA: 22
Stanford: 11
Cal State-Bakersfield: 1
Austin Peay: 2
Minnesota: 6
Long Beach State: 1
Florida State: 12
Tennessee-Chattanooga: 1
Tulane: 2
Rhode Island: 3
St. John's (NY): 5
Oregon State: 2
Ball State: 1
Baylor: 11
Valparaiso: 2
Toled

In [157]:
print("\nNBA Team In-Degrees:")
for team, degree in team_in_degrees.items():
    print(f"{team}: {degree}")


NBA Team In-Degrees:
HOU: 28
SAC: 32
VAN: 10
NOK: 5
CHI: 32
BOS: 35
GSW: 31
NOH: 10
MIL: 27
MEM: 41
ATL: 33
BKN: 15
IND: 28
PHI: 38
SAS: 24
DET: 36
WAS: 30
POR: 33
NYK: 24
SEA: 8
DEN: 30
MIN: 25
CLE: 26
UTA: 30
LAL: 28
DAL: 23
PHX: 25
TOR: 32
LAC: 33
NJN: 19
ORL: 33
MIA: 19
CHA: 26
OKC: 23
NOP: 7
CHH: 5
