In [1]:
import plotly.graph_objects as go
import pandas as pd

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/titanic.csv')

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [26]:
df1 = df.groupby(['Pclass', 'Sex'])['Name'].count().reset_index()
df1.columns = ['source', 'target', 'value']
df1['source'] = df1.source.map({1:'PClass1', 2: 'PClass2', 3: 'PClass3'})

df2 = df.groupby(['Sex', 'Survived'])['Name'].count().reset_index()
df2.columns = ['source', 'target', 'value']
df2['target'] = df2.target.map({0: 'Died', 1: 'Survived'})

In [27]:
new_df = pd.concat([df1, df2])
new_df

Unnamed: 0,source,target,value
0,PClass1,female,9
1,PClass1,male,21
2,PClass2,female,12
3,PClass2,male,18
4,PClass3,female,35
5,PClass3,male,61
0,female,Died,16
1,female,Survived,40
2,male,Died,86
3,male,Survived,14


In [36]:
unique_source_target = list(pd.unique(new_df[['source', 'target']].values.ravel('K')))

In [37]:
unique_source_target

['PClass1', 'PClass2', 'PClass3', 'female', 'male', 'Died', 'Survived']

In [38]:
# the value of list with it specified index
mapping_dict = {cols: idx for idx, cols in enumerate(unique_source_target)}

In [39]:
mapping_dict

{'PClass1': 0,
 'PClass2': 1,
 'PClass3': 2,
 'female': 3,
 'male': 4,
 'Died': 5,
 'Survived': 6}

In [40]:
new_df['source'] = new_df['source'].map(mapping_dict)
new_df['target'] = new_df['target'].map(mapping_dict)
new_df

Unnamed: 0,source,target,value
0,0,3,9
1,0,4,21
2,1,3,12
3,1,4,18
4,2,3,35
5,2,4,61
0,3,5,16
1,3,6,40
2,4,5,86
3,4,6,14


In [47]:
df_dict = new_df.to_dict(orient='list')

In [48]:
df_dict

{'source': [0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
 'target': [3, 4, 3, 4, 3, 4, 5, 6, 5, 6],
 'value': [9, 21, 12, 18, 35, 61, 16, 40, 86, 14]}

In [50]:
fig = go.Figure(data= [go.Sankey(
    node=dict(
        pad=15,
        thickness=24,
        line = dict(color='orange', width=0.6),
        label=unique_source_target,
        color='blue'
    ),
    link=dict(
        source = df_dict['source'],
        target = df_dict['target'],
        value = df_dict['value']
    )
)])

In [51]:
fig.update_layout(title_text = 'Titanic Survival Sankey Diagram', font_size=10)
fig.show()