In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
df=pd.read_csv('all_communities_infomap.csv',sep=";")
gender=pd.read_csv('gendered_titles_final.csv')
gender=gender.drop_duplicates(subset='Name')

In [None]:
df.loc[df['Size'] <= 50, 'Community ID'] = 'Other'
df['Members']=df['Members'].apply(lambda x: x.split(', '))
df=df.explode('Members')
df=df.rename(columns={'Members':'Name'})
df24 = df[df['Year'].isin([2024])]
df24.loc[~df24['Community ID'].isin([1, 2, 3, 4, 8]), 'Community ID'] = 'Other'
names24 = set(df24['Name'].unique())
df=df[df['Name'].isin(names24)]

In [4]:
#filter all dataframes to only include the names that are in the English 2024 data
def prep_dataframe(df,year,endnames):
    yeardf=df[df['Year']==year]
    yeardf=yeardf.drop_duplicates(subset=['Name','Community ID'])
    yeardf=yeardf[['Community ID','Name']]
    yearnames=set(yeardf['Name'].unique())
    missingnames=endnames.difference(yearnames)
    missing_df = pd.DataFrame({'Community ID': ["Not in data"] * len(missingnames), 'Name': list(missingnames)})
    yeardf = pd.concat([yeardf, missing_df], ignore_index=True)
    yeardf=yeardf.rename(columns={'Community ID':'CommunityID'})
    yeardf['CommunityID'].astype(str)
    return yeardf

df_2024=df24.rename(columns={'Community ID':'CommunityID'})
df_2020=prep_dataframe(df,2020,names24)
df_2016=prep_dataframe(df,2016,names24)
df_2012=prep_dataframe(df,2012,names24)
df_2008=prep_dataframe(df,2008,names24)

In [5]:
df = pd.concat([
    df_2008.assign(Year=2008),
    df_2012.assign(Year=2012),
    df_2016.assign(Year=2016),
    df_2020.assign(Year=2020),
    df_2024.assign(Year=2024)
])

In [6]:
df_pivot = df.pivot_table(index="Name", columns="Year", values="CommunityID", aggfunc='first')
df_pivot = df_pivot[[2008, 2012, 2016, 2020, 2024]]
df_combined = df_pivot.reset_index()
df_combined.rename(columns = {2008:'C2008',2012:'C2012',2016:'C2016',2020:'C2020',2024:'C2024','Year':'Index'}, inplace = True)

In [7]:
colorlist=['rgb(3,29,68)','rgb(132,5,251)','rgb(175,94,192)','rgb(109,190,152)', 'rgb(0,196,170)','rgb(67,100,54)']
neutralcolor='rgb(218,183,133)'

In [8]:
df_combined['C2008']=df_combined['C2008'].astype(str)
df_combined['C2012']=df_combined['C2012'].astype(str)
df_combined['C2016']=df_combined['C2016'].astype(str)
df_combined['C2020']=df_combined['C2020'].astype(str)
df_combined['C2024']=df_combined['C2024'].astype(str)

In [9]:
df_filtered=df_combined

dim_2008=go.parcats.Dimension(values=df_filtered.C2008, label="2008",categoryorder='array',categoryarray=[x for x in df_filtered.C2008.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2012=go.parcats.Dimension(values=df_filtered.C2012, label="2012",categoryorder='array',categoryarray=[x for x in df_filtered.C2012.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2016=go.parcats.Dimension(values=df_filtered.C2016, label="2016",categoryorder='array',categoryarray=[x for x in df_filtered.C2016.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2020=go.parcats.Dimension(values=df_filtered.C2020, label="2020",categoryorder='array',categoryarray=[x for x in df_filtered.C2020.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2024=go.parcats.Dimension(values=df_filtered.C2024, label="2024",categoryorder='array',categoryarray=[x for x in df_filtered.C2024.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])

dim=[dim_2008,dim_2012,dim_2016,dim_2020,dim_2024]
color = df_filtered.C2024.apply(lambda x: '#031D44' if x == '3' else '#8405FB' if x == '1' else ('#9A32DE' if x=='6' else( '#AF5EC0' if x=='4' else( '#C58BA3' if x=='10' else('#B5A399' if x=='7' else('#A4BB8F' if x=='13' else('#6DBE98' if x=='2' else('#00C4AA' if x=='5' else('#436436' if x=='8' else('#007A6A' if x=='11' else('#0853C4' if x=='9' else neutralcolor)))))))))))
colorscale = [[0, neutralcolor], [0.5, neutralcolor], [1, neutralcolor]];
fig = go.Figure(data = [go.Parcats(dimensions=dim,
        line={'color': color, 'colorscale': colorscale,'shape': 'hspline'},
        hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])
fig.update_layout(
    margin=dict(l=60, r=60, t=60, b=30),
    height=1000,
    width=1000,
    title_x=0.5
)
fig.show()

#save as html
fig.write_html("Sankey/(Time_all.html")


In [10]:
df_filtered=df_combined[df_combined['C2024']!='Other']

dim_2008=go.parcats.Dimension(values=df_filtered.C2008, label="2008",categoryorder='array',categoryarray=[x for x in df_filtered.C2008.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2012=go.parcats.Dimension(values=df_filtered.C2012, label="2012",categoryorder='array',categoryarray=[x for x in df_filtered.C2012.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2016=go.parcats.Dimension(values=df_filtered.C2016, label="2016",categoryorder='array',categoryarray=[x for x in df_filtered.C2016.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2020=go.parcats.Dimension(values=df_filtered.C2020, label="2020",categoryorder='array',categoryarray=[x for x in df_filtered.C2020.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])
dim_2024=go.parcats.Dimension(values=df_filtered.C2024, label="2024",categoryorder='array',categoryarray=[x for x in df_filtered.C2024.unique() if x not in  ['Other','Not in data']] + ['Other','Not in data'])

dim=[dim_2008,dim_2012,dim_2016,dim_2020,dim_2024]
color = df_filtered.C2024.apply(lambda x: '#031D44' if x == '3' else '#8405FB' if x == '1' else ('#9A32DE' if x=='6' else( '#AF5EC0' if x=='4' else( '#C58BA3' if x=='10' else('#B5A399' if x=='7' else('#A4BB8F' if x=='13' else('#6DBE98' if x=='2' else('#00C4AA' if x=='5' else('#436436' if x=='8' else('#007A6A' if x=='11' else('#0853C4' if x=='9' else neutralcolor)))))))))))
colorscale = [[0, neutralcolor], [0.5, neutralcolor], [1, neutralcolor]];
fig = go.Figure(data = [go.Parcats(dimensions=dim,
        line={'color': color, 'colorscale': colorscale,'shape': 'hspline'},
        hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])
fig.update_layout(
    margin=dict(l=60, r=60, t=60, b=30),
    height=1000,
    width=1000,
    title_x=0.5
)
fig.show()

fig.write_html("Sankey/Time_focus.html")
