---
# Figures notebook

Supplementary material for the paper ***The Visual Story of Data Storage: From Storage Properties to User Interfaces***, *Aleksandar Anžel, Dominik Heider, and Georges Hattab*

Please cite the paper when using this notebook or data.

---

In [1]:
import pandas as pd
import numpy as np
import altair as alt
import os
from datetime import timedelta
#from altair_saver import save

In [2]:
__author__ = 'Aleksandar Anžel'
__copyright__ = ''
__credits__ = ['Aleksandar Anžel', 'Georges Hattab']
__license__ = 'GNU General Public License v3.0'
__version__ = '1.0'
__maintainer__ = 'Aleksandar Anžel'
__email__ = 'aleksandar.anzel@uni-marburg.de'
__status__ = 'Dev'

In [3]:
# Function that changes font family globaly
def cm_mono():
    font = "CM Mono"
    
    return {
        "config" : {
             "title": {'font': font},
             "axis": {
                  "labelFont": font,
                  "titleFont": font
             },
             "header": {
                  "labelFont": font,
                  "titleFont": font
             },
             "legend": {
                  "labelFont": font,
                  "titleFont": font
             }
        }
    }

alt.themes.register('cm_mono', cm_mono)
alt.themes.enable('cm_mono')


ThemeRegistry.enable('cm_mono')

In [4]:
data_path = os.path.join('..', 'Data')

---
## Data import

In [5]:
table_name = 'ori.dat' 
table_path = os.path.join(data_path, table_name)
table = pd.read_csv(table_path, delimiter = ';', index_col = 0, skipinitialspace = True)
table.head()

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1932,Drum memory,magnetic,1960s,62500,,0.0,2,0,
2,1946,Williams-Kilburn Tube,cathode ray tube,1955,1000,,0.0,2,1,
3,1949,Magnetic-core memory,magnetic,1970s,100000,,1.0,2,1,
4,1952,Magnetic Band (Tape),magnetic,today,330000000000000,,0.0,2,0,
5,1956,Hard Disk Drive (HDD),magnetic,today,2000000000000,,1.0,2,1,


---
## Data cleaning

In [6]:
# Used for correcting year representation
def fix_year(year_string):
    return str.split(str(year_string), '.')[0]

In [7]:
table['year'] = table['year'].apply(fix_year)
table['year'] = pd.to_datetime(table['year'])
table.head()

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0,2,0,
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,0.0,2,1,
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0,2,1,
4,1952-01-01,Magnetic Band (Tape),magnetic,today,330000000000000,,0.0,2,0,
5,1956-01-01,Hard Disk Drive (HDD),magnetic,today,2000000000000,,1.0,2,1,


In [8]:
# Add jitter to year values so that we don't have overlapping
value_count = table['year'].value_counts()
value_count.index = pd.to_datetime(value_count.index)
value_count.head()

1994-01-01    2
2010-01-01    2
1978-01-01    2
1995-01-01    2
1932-01-01    1
Name: year, dtype: int64

In [9]:
counter = 0
timestamp_temp = None

for i, row in table.iterrows():
    
    if (timestamp_temp != row['year']):
        timestamp_temp = row['year']
        counter = 0
        
    if (value_count[row['year']] != 1):
        table.at[i, 'year'] = row['year'] + pd.DateOffset(years=counter)
        counter+=1


In [10]:
table.head()

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0,2,0,
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,0.0,2,1,
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0,2,1,
4,1952-01-01,Magnetic Band (Tape),magnetic,today,330000000000000,,0.0,2,0,
5,1956-01-01,Hard Disk Drive (HDD),magnetic,today,2000000000000,,1.0,2,1,


Casting column *type* to string

In [11]:
storage_types = table['type']
storage_types = storage_types.unique().astype(str)
storage_types

array(['magnetic', 'cathode ray tube', 'optical', 'electronic',
       'electro-mechanical', 'magneto-optical', 'molecular', 'atomic'],
      dtype='<U18')

---
## Creating figures
### Defining global variables

In [12]:
# Defining colors

pink_custom = '#CD7DA9' #alt.value('rgb(205, 125, 169)')
blue_custom = '#2476B6' #alt.value('rgb(36, 118, 182)')
orange_custom = '#E6A02E' #alt.value('rgb(230, 160, 46)')
green_custom = '#479F77' #alt.value('rgb(71, 159, 119)')
red_custom = '#D73F47' #alt.value('rgb(215, 63, 71)')
gray_custom = '#BCBCBC' #alt.value('rgb(188, 188, 188)')

# Defining global variables
strokeWidth_var = 3
chartWidth_var = 600


### 1. Creating chart: x = year, y = access

In [13]:
first_chart_access = alt.Chart(data = table).mark_circle(size=100, opacity = 1).encode(
    alt.X('year:T'),
    alt.Y('accessability:N'),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom]), legend = None)
)

In [14]:
first_chart_access

In [15]:
second_chart_access = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = blue_custom, fill = gray_custom).encode(
    alt.X('year:T'),
    alt.Y('accessability:N'),
    alt.Opacity('type:N', legend = None),
).properties(
    width=600
).transform_filter ('datum.type == "magneto-optical"')

In [16]:
second_chart_access

In [17]:
third_chart_access = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = 'gray', fill = 'white').encode(
    alt.X('year:T'),
    alt.Y('accessability:N'),
    alt.Opacity('type:N', legend = None),
).properties(
    width=600
).transform_filter ('datum.type == "atomic"')

In [18]:
third_chart_access

In [19]:
fourth_chart_access = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = 'black', fill = 'white').encode(
    alt.X('year:T', scale = alt.Scale (nice = True), axis = alt.Axis(title = None)),
    alt.Y('accessability:N', axis = alt.Axis(title = None)),
    alt.Opacity('type:N', legend = None)
).properties(
    width=600
).transform_filter ('datum.type == "molecular"')

In [20]:
fourth_chart_access

In [21]:
final_chart_access = first_chart_access + second_chart_access + third_chart_access + fourth_chart_access

In [22]:
final_chart_access = final_chart_access.properties(width = chartWidth_var)
final_chart_access

### 2. Creating chart: x = capacity, y = name

In [23]:
table['capacity_float'] = table['capacity'].astype(float)
table.head()

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10,capacity_float
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0,2,0,,62500.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,0.0,2,1,,1000.0
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0,2,1,,100000.0
4,1952-01-01,Magnetic Band (Tape),magnetic,today,330000000000000,,0.0,2,0,,330000000000000.0
5,1956-01-01,Hard Disk Drive (HDD),magnetic,today,2000000000000,,1.0,2,1,,2000000000000.0


In [24]:
first_chart_capacity = alt.Chart(data = table).mark_circle(size=100, opacity = 1).encode(
    alt.X('name:N', sort = None, axis = alt.Axis(title = None, grid = True, labelAngle = -45)),
    alt.Y('capacity_float:Q', scale = alt.Scale(type = 'log', nice = True), axis=alt.Axis(title = None, format=".1e")),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom]), legend = None),
)


In [25]:
first_chart_capacity

In [26]:
second_chart_capacity = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = blue_custom, fill = gray_custom).encode(
    alt.X('name:N', sort = None, axis = alt.Axis(grid = True, labelAngle = -45)),
    alt.Y('capacity_float:Q', scale = alt.Scale(type = 'log')),
).transform_filter ('datum.type == "magneto-optical"')

In [27]:
second_chart_capacity

In [28]:
third_chart_capacity = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = gray_custom, fill = 'white').encode(
    alt.X('name:N', sort = None, axis = alt.Axis(grid = True, labelAngle = -45)),
    alt.Y('capacity_float:Q', scale = alt.Scale(type = 'log', nice = True)),
).transform_filter ('datum.type == "atomic"')

In [29]:
third_chart_capacity

In [30]:
fourth_chart_capacity = alt.Chart(data = table).mark_circle(size=60, opacity = 1, stroke = 'black', fill = 'white').encode(
    alt.X('name:N', sort = None, axis = alt.Axis(grid = True, labelAngle = -45)),
    alt.Y('capacity_float:Q', scale = alt.Scale(type = 'log', nice = True)),
).transform_filter ('datum.type == "molecular"')

In [31]:
fourth_chart_capacity

In [32]:
final_chart_capacity = first_chart_capacity + second_chart_capacity + third_chart_capacity + fourth_chart_capacity

In [33]:
final_chart_capacity = final_chart_capacity.properties(width = chartWidth_var)
final_chart_capacity

### 3. Creating chart: x = year/usage, y = name

In [34]:
table.head()

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10,capacity_float
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0,2,0,,62500.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,0.0,2,1,,1000.0
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0,2,1,,100000.0
4,1952-01-01,Magnetic Band (Tape),magnetic,today,330000000000000,,0.0,2,0,,330000000000000.0
5,1956-01-01,Hard Disk Drive (HDD),magnetic,today,2000000000000,,1.0,2,1,,2000000000000.0


In [35]:
# Modifying table column "usage"
def fix_usage(year_string):
    if year_string == 'today':
        year_string = '2021'
    elif year_string[-1] == 's':
        year_string = year_string[:-2] + '5'
    return year_string

table['usage'] = table['usage'].astype(str)
table['usage'] = table['usage'].apply(fix_usage)
table['usage'] = pd.to_datetime(table['usage'])
table.head()


Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,addressability,mutability,accessability,Unnamed: 10,capacity_float
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1932-01-01,Drum memory,magnetic,1965-01-01,62500,,0.0,2,0,,62500.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955-01-01,1000,,0.0,2,1,,1000.0
3,1949-01-01,Magnetic-core memory,magnetic,1975-01-01,100000,,1.0,2,1,,100000.0
4,1952-01-01,Magnetic Band (Tape),magnetic,2021-01-01,330000000000000,,0.0,2,0,,330000000000000.0
5,1956-01-01,Hard Disk Drive (HDD),magnetic,2021-01-01,2000000000000,,1.0,2,1,,2000000000000.0


In [36]:
min_year_value = min(table['year']) - timedelta(days=365)
max_year_value = max(table['usage'])

In [37]:
first_chart_usage = alt.Chart(data = table).mark_bar().encode(
    alt.X('year:T', axis = alt.Axis(title = None, tickCount = 50), scale = alt.Scale(domain=[min_year_value, max_year_value])),
    alt.X2('usage:T'),
    alt.Y('name:N', sort = 'x', axis = alt.Axis(title = None)),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom]), legend = None),
)


In [38]:
first_chart_usage

In [39]:
second_chart_usage = alt.Chart(data = table).mark_bar(stroke = blue_custom, fill = gray_custom, strokeWidth = strokeWidth_var).encode(
    alt.X('year:T', scale = alt.Scale(domain=[min_year_value, max_year_value])),
    alt.X2('usage:T'),
    alt.Y('name:N', sort = 'x'),
).transform_filter ('datum.type == "magneto-optical"')

In [40]:
second_chart_usage

In [41]:
third_chart_usage = alt.Chart(data = table).mark_bar(stroke = gray_custom, fill = 'white', strokeWidth = strokeWidth_var).encode(
    alt.X('year:T', scale = alt.Scale(domain=[min_year_value, max_year_value])),
    alt.X2('usage:T'),
    alt.Y('name:N', sort = 'x'),
).transform_filter ('datum.type == "atomic"')

In [42]:
third_chart_usage

In [43]:
fourth_chart_usage = alt.Chart(data = table).mark_bar(stroke = 'black', fill = 'white', strokeWidth = strokeWidth_var).encode(
    alt.X('year:T', scale = alt.Scale(domain=[min_year_value, max_year_value])),
    alt.X2('usage:T'),
    alt.Y('name:N', sort = 'x'),
).transform_filter ('datum.type == "molecular"')

In [44]:
fourth_chart_usage

In [45]:
final_chart_usage = first_chart_usage + second_chart_usage + third_chart_usage + fourth_chart_usage

In [46]:
final_chart_usage = final_chart_usage.properties(width = chartWidth_var)
final_chart_usage

---
## Survey part
### Uploading data

In [47]:
survey_mosla_data_path = os.path.join(data_path, 'Survey_data', 'Data_storage_survey_researchers.csv')
survey_mosla_df = pd.read_csv(survey_mosla_data_path)
survey_mosla_df

Unnamed: 0,Timestamp,Please rank the attributes by the order of importance that makes most sense to you as a researcher [1st choice],Please rank the attributes by the order of importance that makes most sense to you as a researcher [2nd choice],Please rank the attributes by the order of importance that makes most sense to you as a researcher [3rd choice],Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [1st choice],Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [2nd choice],Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [3rd choice],Gender,Your field of research,Years of experience
0,2021/04/08 6:09:26 PM GMT+2,Accessibility,Capacity,Lifespan,Capacity,Capacity,Lifespan,Male,Computer Science,0-3
1,2021/04/08 6:54:02 PM GMT+2,Capacity,Lifespan,Accessibility,Usage,Capacity,Lifespan,Male,Biology;Computer Science,10+
2,2021/04/08 7:04:30 PM GMT+2,Accessibility,Usage,Mutability,Accessibility,Lifespan,Mutability,Male,Biology,10+
3,2021/04/08 8:07:36 PM GMT+2,Lifespan,Mutability,Capacity,Mutability,Capacity,Lifespan,Female,Chemistry,10+
4,2021/04/08 8:45:34 PM GMT+2,Capacity,Lifespan,Capacity,Lifespan,Capacity,Lifespan,Male,Computer Science,10+
5,2021/04/09 10:09:31 AM GMT+2,Lifespan,Mutability,Usage,Accessibility,Mutability,Lifespan,Female,Computer Science,0-3
6,2021/04/11 1:23:15 PM GMT+2,Accessibility,Capacity,Usage,Accessibility,Usage,Capacity,Male,Physics,10+
7,2021/04/13 4:56:27 PM GMT+2,Capacity,Accessibility,Lifespan,Accessibility,Mutability,Capacity,Male,Computer Science,3-5
8,2021/04/14 4:46:25 PM GMT+2,Accessibility,Mutability,Usage,Accessibility,Mutability,Usage,Male,Computer Science,10+
9,2021/04/15 12:00:00 PM GMT+2,Capacity,Lifespan,Accessibility,Accessibility,Capacity,Usage,Male,Computer Science,10+


In [48]:
survey_general_data_path = os.path.join(data_path, 'Survey_data', 'Data_storage_survey_students.csv')
survey_general_df = pd.read_csv(survey_general_data_path)
survey_general_df

Unnamed: 0,Timestamp,Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [1st choice],Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [2nd choice],Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [3rd choice],Gender,Your field of study
0,2021/04/19 12:17:19 PM GMT+2,Lifespan,Capacity,Mutability,Male,Computer Science
1,2021/04/19 12:19:31 PM GMT+2,Accessibility,Usage,Mutability,Female,Computer Science
2,2021/04/19 12:46:17 PM GMT+2,Mutability,Capacity,Accessibility,Male,Computer Science;economics
3,2021/04/19 12:46:59 PM GMT+2,Capacity,Usage,Accessibility,Female,Computer Science
4,2021/04/19 2:52:42 PM GMT+2,Accessibility,Mutability,Lifespan,Male,Wirtschaftsinformatik
5,2021/04/19 5:06:09 PM GMT+2,Lifespan,Capacity,Accessibility,Male,Computer Science
6,2021/04/20 10:23:14 AM GMT+2,Capacity,Usage,Lifespan,Male,Computer Science
7,2021/04/20 1:54:21 PM GMT+2,Accessibility,Usage,Mutability,Female,Business informatics
8,2021/04/20 5:33:38 PM GMT+2,Mutability,Lifespan,Capacity,Male,Computer Science
9,2021/04/20 8:25:49 PM GMT+2,Accessibility,Usage,Mutability,Female,Computer Science


In [49]:
# This part is for 'as a researcher' question
new_survey_mosla_research_df = survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a researcher [1st choice]'].value_counts().to_frame(name='1st')
new_survey_mosla_research_df = pd.concat([new_survey_mosla_research_df, survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a researcher [2nd choice]'].value_counts().rename('2nd')], axis=1)
new_survey_mosla_research_df = pd.concat([new_survey_mosla_research_df, survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a researcher [3rd choice]'].value_counts().rename('3rd')], axis=1)
new_survey_mosla_research_df.fillna(0, inplace=True)
new_survey_mosla_research_df = new_survey_mosla_research_df.astype(np.int64).T
new_survey_mosla_research_df


Unnamed: 0,Lifespan,Capacity,Accessibility,Mutability,Usage
1st,8,6,5,0,0
2nd,5,7,1,4,2
3rd,2,4,7,3,3


In [50]:
# This part is for 'as a member of general public' question
new_survey_mosla_general_df = survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [1st choice]'].value_counts().to_frame(name='1st')
new_survey_mosla_general_df = pd.concat([new_survey_mosla_general_df, survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [2nd choice]'].value_counts().rename('2nd')], axis=1)
new_survey_mosla_general_df = pd.concat([new_survey_mosla_general_df, survey_mosla_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [3rd choice]'].value_counts().rename('3rd')], axis=1)
new_survey_mosla_general_df.fillna(0, inplace=True)
new_survey_mosla_general_df = new_survey_mosla_general_df.astype(np.int64).T
new_survey_mosla_general_df

Unnamed: 0,Accessibility,Capacity,Usage,Lifespan,Mutability
1st,8,5,3,2,1
2nd,3,8,3,1,4
3rd,2,2,2,9,4


In [51]:
# This part is for 'general public' question
new_survey_general_df = survey_general_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [1st choice]'].value_counts().to_frame(name='1st')
new_survey_general_df = pd.concat([new_survey_general_df, survey_general_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [2nd choice]'].value_counts().rename('2nd')], axis=1)
new_survey_general_df = pd.concat([new_survey_general_df, survey_general_df['Please rank the attributes by the order of importance that makes most sense to you as a member of the general public [3rd choice]'].value_counts().rename('3rd')], axis=1)
new_survey_general_df.fillna(0, inplace=True)
new_survey_general_df = new_survey_general_df.astype(np.int64).T
new_survey_general_df

Unnamed: 0,Accessibility,Lifespan,Capacity,Mutability,Usage
1st,11,4,4,2,0
2nd,0,3,7,4,7
3rd,6,3,6,4,2


In [52]:
# Exporting dataframes to latex format
print(new_survey_mosla_research_df.T.to_latex())
print(new_survey_mosla_general_df.T.to_latex())
print(new_survey_general_df.T.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  1st &  2nd &  3rd \\
\midrule
Lifespan      &    8 &    5 &    2 \\
Capacity      &    6 &    7 &    4 \\
Accessibility &    5 &    1 &    7 \\
Mutability    &    0 &    4 &    3 \\
Usage         &    0 &    2 &    3 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrr}
\toprule
{} &  1st &  2nd &  3rd \\
\midrule
Accessibility &    8 &    3 &    2 \\
Capacity      &    5 &    8 &    2 \\
Usage         &    3 &    3 &    2 \\
Lifespan      &    2 &    1 &    9 \\
Mutability    &    1 &    4 &    4 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrr}
\toprule
{} &  1st &  2nd &  3rd \\
\midrule
Accessibility &   11 &    0 &    6 \\
Lifespan      &    4 &    3 &    3 \\
Capacity      &    4 &    7 &    6 \\
Mutability    &    2 &    4 &    4 \\
Usage         &    0 &    7 &    2 \\
\bottomrule
\end{tabular}



In [53]:
# Concatenating in order to create one big table
final_df = pd.concat([new_survey_mosla_research_df.T, new_survey_mosla_general_df.T, new_survey_general_df.T], axis=1)
final_df

Unnamed: 0,1st,2nd,3rd,1st.1,2nd.1,3rd.1,1st.2,2nd.2,3rd.2
Lifespan,8,5,2,2,1,9,4,3,3
Capacity,6,7,4,5,8,2,4,7,6
Accessibility,5,1,7,8,3,2,11,0,6
Mutability,0,4,3,1,4,4,2,4,4
Usage,0,2,3,3,3,2,0,7,2


In [54]:
print(final_df.to_latex())

\begin{tabular}{lrrrrrrrrr}
\toprule
{} &  1st &  2nd &  3rd &  1st &  2nd &  3rd &  1st &  2nd &  3rd \\
\midrule
Lifespan      &    8 &    5 &    2 &    2 &    1 &    9 &    4 &    3 &    3 \\
Capacity      &    6 &    7 &    4 &    5 &    8 &    2 &    4 &    7 &    6 \\
Accessibility &    5 &    1 &    7 &    8 &    3 &    2 &   11 &    0 &    6 \\
Mutability    &    0 &    4 &    3 &    1 &    4 &    4 &    2 &    4 &    4 \\
Usage         &    0 &    2 &    3 &    3 &    3 &    2 &    0 &    7 &    2 \\
\bottomrule
\end{tabular}



In [55]:
tmp_mosla_research_df = new_survey_mosla_research_df.reset_index().melt(id_vars=['index'])
tmp_mosla_general_df = new_survey_mosla_general_df.reset_index().melt(id_vars=['index'])

chart_survey_1 = alt.Chart(tmp_mosla_research_df).mark_bar().encode(
    alt.X('variable:N', axis=alt.Axis(title='', labels=False)),
    alt.Y('value:Q', axis=alt.Axis(title='')),
    alt.Color('variable:N'),
    alt.Column('index:N', title='MOSLA experts'),
)

chart_survey_2 = alt.Chart(tmp_mosla_general_df).mark_bar().encode(
    alt.X('variable:N', axis=alt.Axis(title='', labels=False)),
    alt.Y('value:Q', axis=alt.Axis(title='')),
    alt.Color('variable:N', legend=alt.Legend(title='Property')),
    alt.Column('index:N', title='MOSLA general'),
)

final_chart_survey = alt.hconcat(chart_survey_1, chart_survey_2).resolve_scale(y='shared')
final_chart_survey

---
## Saving figures

Important: Due to a bug documented here https://github.com/altair-viz/altair/issues/1954 font changes are not persistent when trying to save as pdf. The warnings can be ignored.

Saving steps: click on three dots -> save as SVG -> open in browser -> print as PDF -> crop PDF


In [56]:
root_save_path = os.path.join('..', 'Output')

#save(final_chart_access, os.path.join(root_save_path, 'Year_access.pdf'))
#save(final_chart_capacity, os.path.join(root_save_path, 'Capacity_name.pdf'))
#save(final_chart_usage, os.path.join(root_save_path, 'Usage_name.pdf'))
#save(final_chart_survey, os.path.join(root_save_path, 'Survey.pdf'))

#save(final_chart_access, os.path.join(root_save_path, 'Year_access.svg'))
#save(final_chart_capacity, os.path.join(root_save_path, 'Capacity_name.svg'))
#save(final_chart_usage, os.path.join(root_save_path, 'Usage_name.svg'))
#save(final_chart_survey, os.path.join(root_save_path, 'Survey.svg'))