Inspired by : 

https://www.behance.net/gallery/100683383/Travelling-to-Outer-Space  
https://mir-s3-cdn-cf.behance.net/project_modules/2800_opt_1/a223b4100683383.6096c912857c0.png  
      


In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [2]:

df = pd.read_csv("astronauts.csv", sep=";").replace({np.nan: None}) #.fillna(value=None)

for k in ['Hours/mission', 'Total hrs (sum)', 'EVA hrs/mission', 'Total EVA hrs' ]:
    df[k] = df[k].str.replace(",", ".").astype('float64')

#df['Decade of mission'] = ((df['Year of mission']/10).apply(np.floor)*10).astype(int)
    
df.head(5)

Unnamed: 0,ID,Number,Nationwide number,Name,Original name,Sex,Year of birth,Nationality,Military/Civilian,Selection,Year of Selection,mission number,total number of missions,Occupation,Year of mission,Mission title,Ascend shuttle,In orbit,Descend shuttle,Hours/mission,Total hrs (sum),Field21,EVA hrs/mission,Total EVA hrs
0,1,1,1,"Gagarin, Yuri",ГАГАРИН Юрий Алексеевич,M,1934,U.S.S.R/Russia,Mil,TsPK-1,1960,1,1,pilot,1961,Vostok 1,Vostok 1,Vostok 2,Vostok 3,1.77,1.77,0,0.0,0.0
1,2,2,2,"Titov, Gherman",ТИТОВ Герман Степанович,M,1935,U.S.S.R/Russia,Mil,TsPK-1,1960,1,1,pilot,1961,Vostok 2,Vostok 2,Vostok 2,Vostok 2,25.0,25.3,0,0.0,0.0
2,3,3,1,"Glenn, John H., Jr.","Glenn, John H., Jr.",M,1921,U.S.,Mil,NASA Astronaut Group 1,1959,1,2,pilot,1962,MA-6,MA-6,MA-6,MA-6,5.0,218.0,0,0.0,0.0
3,4,3,1,"Glenn, John H., Jr.","Glenn, John H., Jr.",M,1921,U.S.,Mil,NASA Astronaut Group 2,1959,2,2,PSP,1998,STS-95,STS-95,STS-95,STS-95,213.0,218.0,0,0.0,0.0
4,5,4,2,"Carpenter, M. Scott","Carpenter, M. Scott",M,1925,U.S.,Mil,NASA- 1,1959,1,1,Pilot,1962,Mercury-Atlas 7,Mercury-Atlas 7,Mercury-Atlas 7,Mercury-Atlas 7,5.0,5.0,0,0.0,0.0


In [3]:
final_df = pd.DataFrame()

for ( (astro_nbr, astro_name, astro_nat, astro_sex), group) in df.sort_values('Year of mission', ascending=True).groupby(["Number", 'Name', 'Nationality', 'Sex']):

    base = {"id":astro_nbr,
            "name":astro_name,
            "nationality":astro_nat,
            "sex":astro_sex,
            }    
    new_row_dict = base | group[  ['Year of mission', 'Total hrs (sum)', 'Total EVA hrs']  ].min().to_dict() 
    
    # the pipe is the dict merging operator from python 3.9
    final_df = final_df.append( new_row_dict, ignore_index=True  )
    
final_df['idx_in_year']  = final_df.groupby('Year of mission').cumcount()
    
final_df.head(10)     
    

Unnamed: 0,id,name,nationality,sex,Year of mission,Total hrs (sum),Total EVA hrs,idx_in_year
0,1.0,"Gagarin, Yuri",U.S.S.R/Russia,M,1961.0,1.77,0.0,0
1,2.0,"Titov, Gherman",U.S.S.R/Russia,M,1961.0,25.3,0.0,1
2,3.0,"Glenn, John H., Jr.",U.S.,M,1962.0,218.0,0.0,0
3,4.0,"Carpenter, M. Scott",U.S.,M,1962.0,5.0,0.0,1
4,5.0,"Nikolayev, Andriyan",U.S.S.R/Russia,M,1962.0,519.33,0.0,2
5,6.0,"Popovich, Pavel",U.S.S.R/Russia,M,1962.0,448.45,0.0,3
6,7.0,"Schirra, Walter M., Jr.",U.S.,M,1962.0,295.2,0.0,4
7,8.0,"Cooper, L. Gordon, Jr.",U.S.,M,1963.0,225.0,0.0,0
8,9.0,"Bykovsky, Valery",U.S.S.R/Russia,M,1963.0,497.8,0.0,1
9,10.0,"Tereshkova, Valentina",U.S.S.R/Russia,F,1963.0,70.83,0.0,2


In [4]:
nbr_astro_per_year = {}

for (year, group) in final_df.groupby("Year of mission"):
    nbr_astro_per_year[year] = len(group)

In [5]:
final_df.describe()

Unnamed: 0,id,Year of mission,Total hrs (sum),Total EVA hrs,idx_in_year
count,565.0,565.0,565.0,565.0,565.0
mean,282.99823,1992.092035,2380.905363,8.153204,7.044248
std,163.244612,13.698839,3635.235492,13.793668,6.773684
min,1.0,1961.0,0.61,0.0,0.0
25%,142.0,1984.0,295.0,0.0,2.0
50%,283.0,1992.0,668.0,0.0,5.0
75%,424.0,2002.0,3587.93,12.32,10.0
max,565.0,2019.0,21083.52,78.8,37.0


### Plotting a POC


First, set the theme as dark

In [7]:
import holoviews as hv
hv.extension('bokeh')

from bokeh.io import export_svgs

In [7]:
from bokeh.themes.theme import Theme

# based on the "dark_minimal" Bokeh Theme : https://github.com/bokeh/bokeh/blob/branch-2.4/bokeh/themes/_dark_minimal.py
#
theme = Theme(
    json = {
    "attrs": {
        "Figure" : {
            "background_fill_color": "#000000",
            "border_fill_color": "#000000",
            "outline_line_color": "#000000",
            "outline_line_alpha": 0.25
        },

        "Grid": {
            "grid_line_color": "#E0E0E0",
            "grid_line_alpha": 0.25
        },

        "Axis": {
            "major_tick_line_alpha": 1,
            "major_tick_line_color": "#E0E0E0",

            "minor_tick_line_alpha": 1,
            "minor_tick_line_color": "#E0E0E0",

            "axis_line_alpha": 1,
            "axis_line_color": "#E0E0E0",

            "major_label_text_color": "#E0E0E0",
            "major_label_text_font": "Helvetica",
            "major_label_text_font_size": "0.8em",

            "axis_label_standoff": 10,
            "axis_label_text_color": "#E0E0E0",
            "axis_label_text_font": "Helvetica",
            "axis_label_text_font_size": "0.8em",
            "axis_label_text_font_style": "normal"
        },

        "Legend": {
            "spacing": 8,
            "glyph_width": 15,

            "label_standoff": 8,
            "label_text_color": "#E0E0E0",
            "label_text_font": "Helvetica",
            "label_text_font_size": "1.025em",

            "border_line_alpha": 0,
            "background_fill_alpha": 0.25,
            "background_fill_color": "#20262B"
        },

        "ColorBar": {
            "title_text_color": "#E0E0E0",
            "title_text_font": "Helvetica",
            "title_text_font_size": "1.025em",
            "title_text_font_style": "normal",

            "major_label_text_color": "#E0E0E0",
            "major_label_text_font": "Helvetica",
            "major_label_text_font_size": "1.025em",

            "background_fill_color": "#15191C",
            "major_tick_line_alpha": 0,
            "bar_line_alpha": 0
        },

        "Title": {
            "text_color": "#E0E0E0",
            "text_font": "Helvetica",
            "text_font_size": "1.15em"
        }
    }
})
hv.renderer('bokeh').theme = theme

In [8]:
#angle = np.linspace(-np.pi/2, 0, 100) # 25%
#angle = np.linspace(-np.pi/2, np.pi/2, 100) # 50%
#angle = np.linspace(-np.pi/2, np.pi, 100) # 75%
#angle = np.linspace(-np.pi/2, 3/2*np.pi, 100) # 100%

max_year = final_df['Year of mission'].max()
base_radius = 20

angle_1 = np.linspace(-np.pi/2, np.pi, 100)
radius_1 = max_year - 1961 + base_radius
circle_1 = list(zip(radius_1*np.sin(angle_1),  radius_1*np.cos(angle_1) ))
point_1 = [radius_1*np.sin(np.pi), radius_1*np.cos(np.pi)]

angle_2 = np.linspace(-np.pi/2, np.pi/2, 100)
radius_2 = max_year - 1990 + base_radius
circle_2 = list(zip(radius_2*np.sin(angle_2),  radius_2*np.cos(angle_2) ))
point_2 = [radius_2*np.sin(np.pi/2), radius_2*np.cos(np.pi/2)]

angle_3 = np.linspace(-np.pi/2, np.pi/3, 100)
radius_3 = max_year - 2000 + base_radius
circle_3 = list(zip(radius_3*np.sin(angle_3),  radius_3*np.cos(angle_3) ))
point_3 = [radius_3*np.sin(np.pi/3), radius_3*np.cos(np.pi/3)]


end_point_4 =  3*np.pi/2
angle_4 = np.linspace(-np.pi/2, end_point_4, 100)
radius_4 = max_year - 2018 + base_radius
circle_4 = list(zip(radius_4*np.sin(angle_4),  radius_4*np.cos(angle_4) ))
point_4 = [radius_4*np.sin(end_point_4), radius_4*np.cos(end_point_4)]

points_data =  [  point_1, point_2, point_3, point_4]
print(points_data)
points = hv.Points( points_data ).opts(color='white', size=2)

#circles = hv.Path(circle_1) *  hv.Path(circle_2) *  hv.Path(circle_3) *  hv.Path(circle_4)
circles = hv.Path( [circle_1, circle_2, circle_3 ] ) * hv.Path(circle_4)

( circles * points  ).opts(width=600, height=600)

[[9.552245033349356e-15, -78.0], [49.0, 3.0003846579110155e-15], [33.77499074759311, 19.500000000000004], [-21.0, -3.857637417314163e-15]]


### Actual plot



In [9]:
# Brand colors
colors = {
    'U.S.S.R/Russia' : '#452392',
    'U.S.' : '#99C941',
    'others' : '#A43A8F'
}

#shade of purple
colors = {
     'U.S.S.R/Russia' : '#452392',
     'U.S.' : '#663BC9',
     'others' :'#A183E6',  
}

#shade of green
#colors = {
#     'U.S.S.R/Russia' : '#597621',
#     'U.S.' : '#99C941',
#     'others' :'#C5E092',  
#}



f = 3     # f like "factor" in "zoom factor"

# We'll need to deduce some constants
longest_duration = final_df['Total hrs (sum)'].max()
max_duration = longest_duration *1.05  # this will be considered as 100% i.e a full circle

min_year = final_df['Year of mission'].min()

# We need the max year, and the max idx for the year
# - keep only the rows we're interested in
# - sort ascending
# - take the last row
# - convert to dict and extract the data, which are under the form [2019.0, 3]
max_year, max_idx_in_year = final_df[['Year of mission', 'idx_in_year']] \
                    .sort_values(['Year of mission', 'idx_in_year'], ascending=True) \
                    .tail(1) \
                    .to_dict('split')['data'][0]


def astronaut_glyphs_data(year_start, duration, max_duration, max_year):

    end_point = 2*np.pi * duration / max_duration - np.pi/2
    angle = np.linspace(-np.pi/2, end_point, 100)
    radius = (max_year - year_start + base_radius)*10
    circle_data = list(zip(radius*np.sin(angle),  radius*np.cos(angle) ))
    
    
    point_data = {"x":circle_data[-1][0], 
                  "y":circle_data[-1][1], 
                  "size": (2 + 6 * duration / max_duration) * f
                 }
    return circle_data, point_data
    

    
final_plot = None

# Each curve in the result plot is actually a hv.Path() with a list of coordinates representing the path.
# We could plot one hv.Path per astronaut, but it's not optimized, the rendering takes too long.
# Instead, we plot one hv.Path() per country. The data given to hv.Path is then a list, of list of coordinates.

list_nationalities = list(final_df['nationality'].value_counts().keys())
for nat in list_nationalities: 

    # curves will contains list of coordinates, one list per astronaut
    curves = []

    # points will contain the data for the points. This dict will be turned into a dataframe and given directly to hv.Points()
    points = {'x':[], 'y':[], 'size':[] }
    
    # For each astronaut
    for row in final_df[ final_df['nationality'] == nat  ].to_dict('records'):

        # The number of astronauts (i.e. the number of curves to plot per year), is not constant.
        # We need to give the function the year_start (year of mission), shifted by n/10 if there were 10 astronauts launches that year
        # Todo : rework this, make it more simple

        curve_data, point_data = astronaut_glyphs_data( row['Year of mission'] + row['idx_in_year'] / nbr_astro_per_year[ row['Year of mission']  ] ,
                               row['Total hrs (sum)'],
                               max_duration, 
                               max_year
                             )
        # append the data
        curves.append(curve_data)
        for k in ['x', 'y', 'size' ]:
            points[k].append(point_data[k])
        
    # At this point, we are ready to plot all the data for a given nationality.
    
    # color, based on the nationality
    color = colors[nat] if nat in colors else colors['others']
    
    # plot the curves themselves. I reuse the var name "curves" here.
    curves = hv.Path( curves ).opts(color=color, line_width=1 * f)
    
    # For each point, I plot two : a halo and the actual point.
    halos = hv.Points( pd.DataFrame(points), ).opts(color=color,  size=hv.dim('size')*2, alpha = 0.5, line_alpha=0.25, line_width=2, line_color=color )
    points = hv.Points( pd.DataFrame(points), ).opts(color=color,  size=hv.dim('size'),  line_alpha=0.75 )
    
    # append the plots to the final result
    if final_plot is None:
        final_plot = curves *  halos * points
    else:
        final_plot = final_plot * curves *  halos  * points

        

# adding the reference axis with the years



min_radius = (max_year+max_idx_in_year - min_year + base_radius)*10

max_radius = (max_year - min_year + base_radius)*10


# Let's add ticks and labels, one per decade + the last year
ticks = []
labels = {'x':[], 'y':[], 't':[]}
for year in list(range(int(min_year), int(max_year), 10)) + [max_year]:
    #for year in list(range(int(min_year/10)*10, int(max_year), 10)) + [max_year]:

    radius = (max_year - year + base_radius)*10
    x = radius * np.sin(-np.pi/2)
    y = radius * np.cos(-np.pi/2)
    tick = {
     "x0":x,
     "y0":y, 
     "x1":x,
     "y1":y - 10, 
    }
    
    ticks.append(tick)
    labels['x'].append(x - 7)
    labels['y'].append(y - 30)
    labels['t'].append(str(int(year)))
    


min_x = (max_year - min_year +base_radius)*10
max_x =  (-max_idx_in_year / nbr_astro_per_year[ max_year ] + base_radius)*10

main_axis = {"x0":min_x * np.sin(-np.pi/2),
             "y0":min_x * np.cos(-np.pi/2), 
             "x1":max_x * np.sin(-np.pi/2),
             "y1":max_x * np.cos(-np.pi/2), 
            }
    
reference_axis = hv.Segments( ticks + [main_axis]  ).opts(color='#444444')
labels = hv.Labels(labels, ['x', 'y'], 't').opts( text_color='#444444', text_font_size=f'{7*f}pt', angle=45)

final_plot = reference_axis * final_plot * labels 
    
final_plot = final_plot.opts(width=900*f , height=800*f, xaxis=None, yaxis=None, toolbar=None)

final_plot

In [10]:
# you may need to run the following : 
# pip install selenium
# conda install -c conda-forge firefox geckodriver
hv.save(final_plot, 'astronauts_purple_thick_1_f3.png', fmt='png')