In [1]:
# Load packages
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.colors

In [3]:
# Upload your data as CSV to load as a DataFrame
df = pd.read_csv("C:\\Users\\nickp\\OneDrive\\Desktop\\Home Data Projects\\Viz\\NFL\\Bills Draft\\billsDraftsSimple.csv")
df['Group'] = df['Group'].astype(str)
df['Draft Round'] = df['Draft Round'].astype(str)
df['Starter 2+'] = df['Starter 2+'].astype(str)
df['PB Check'] = df['PB Check'].astype(str)
df = df.rename(columns={'Count of Player':'Players'})
print(df.dtypes)
df

Group          object
Draft Round    object
Starter 2+     object
PB Check       object
Players         int64
dtype: object


Unnamed: 0,Group,Draft Round,Starter 2+,PB Check,Players
0,Special Teams,Round: 6,Starter for 2+ Yrs: No,Pro Bowls Made: None,2
1,Offense,Round: 1,Starter for 2+ Yrs: Yes,Pro Bowls Made: None,1
2,Offense,Round: 1,Starter for 2+ Yrs: Yes,Pro Bowls Made: Multiple,1
3,Offense,Round: 1,Starter for 2+ Yrs: No,Pro Bowls Made: None,1
4,Offense,Round: 2,Starter for 2+ Yrs: Yes,Pro Bowls Made: None,2
5,Offense,Round: 2,Starter for 2+ Yrs: Yes,Pro Bowls Made: Multiple,1
6,Offense,Round: 2,Starter for 2+ Yrs: No,Pro Bowls Made: None,2
7,Offense,Round: 3,Starter for 2+ Yrs: Yes,Pro Bowls Made: None,3
8,Offense,Round: 3,Starter for 2+ Yrs: Yes,Pro Bowls Made: One,1
9,Offense,Round: 3,Starter for 2+ Yrs: No,Pro Bowls Made: None,1


In [9]:
# set graph's structural elements
cols = ["Group", "Draft Round","Starter 2+","PB Check"]
value = "Players"
title = "<b>Bills Draft Picks: 2013 - 2020</b>"
width, height = 1100,600  # Set plot's width and height
fontsize = 12
fontfamily = "Arial"
link_opacity = 0.3
bgcolor = "White"  # Set the plot's background color (use color name or hex code)
node_colors = px.colors.qualitative.G10

In [10]:
# set up labels and colors
s = []
t = []
v = []
labels = np.unique(df[cols].values)  # Collect all the node labels
# Get all the links between two nodes in the data and their corresponding values
for c in range(len(cols) - 1):
    s.extend(df[cols[c]].tolist())
    t.extend(df[cols[c + 1]].tolist())
    v.extend(df[value].tolist())
links = pd.DataFrame({"source": s, "target": t, "value": v})  
links = links.groupby(["source", "target"], as_index=False).agg({"value": "sum"})

# Convert list of colors to RGB format to override default gray link colors
colors = [matplotlib.colors.to_rgb(i) for i in node_colors]  

# Create objects to hold node/label and link colors
label_colors, links["link_c"] = [], 0

In [11]:
# Loop through all the labels to specify color and to use label indices
c, max_colors = 0, len(colors)  # To loop through the colors array
for l in range(len(labels)):
    label_colors.append(colors[c])
    link_color = colors[c] + (link_opacity,)  # Make link more transparent than the node
    links.loc[links.source == labels[l], ["link_c"]] = "rgba" + str(link_color)
    links = links.replace({labels[l]: l})  # Replace node labels with the label's index
    if c == max_colors - 1:
        c = 0
    else:
        c += 1

# Convert colors into RGB string format for Plotly
label_colors = ["rgb" + str(i) for i in label_colors]

In [13]:
# Define a Plotly Sankey diagram
fig = go.Figure(
    data=[
        go.Sankey(
            node=dict(label=labels, color=label_colors),
            link=dict(
                source=links["source"],
                target=links["target"],
                value=links["value"],
                color=links["link_c"],
            ),
        )
    ]
)

# Customize plot based on earlier values
fig.update_layout(
    title_text=title,
    font_size=fontsize,
    font_family=fontfamily,
    width=width,
    height=height,
    paper_bgcolor=bgcolor,
    title={"y": 0.9, "x": 0.5, "xanchor": "left", "yanchor": "top"},  # Centers title
)

fig.show()