In [1]:
import pandas as pd
import numpy as np
import altair as alt
import os
from datetime import timedelta

In [2]:
table_name = 'Untitledt.csv' 
table = pd.read_csv(table_name, delimiter = ';', index_col = 0, skipinitialspace = True)
table

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,access
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1932.0,Drum memory,magnetic,1960s,62500,,0.0
2,1946.0,Williams-Kilburn Tube,cathode ray tube,1955,1000,,1.0
3,1949.0,Magnetic-core memory,magnetic,1970s,100000,,1.0
4,1952.0,Magnetic Band (Tape),magnetic,today,1000000,,0.0
5,1956.0,Hard Disk Drive,magnetic,today,2000000000000,,1.0
6,1960.0,Magnetic stripe card,magnetic,,200,3,0.0
7,1969.0,Floppy Disk,magnetic,1990s,1440000,5--30,1.0
8,1970.0,Bubble Memory,magnetic,1980s,125000,,0.0
9,1978.0,LaserDisc (LD)/Discovision (DV),optical,2001,324000000,,1.0
10,1978.0,Solid-State-Drive (SSD),electronic,today,5000000000000,~10,1.0


In [3]:
# Used for correcting year representation
def fix_year(year_string):
    return str.split(str(year_string), '.')[0]

In [4]:
table['year'] = table['year'].apply(fix_year)
table['year'] = pd.to_datetime(table['year'])
table

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,access
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,1.0
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0
4,1952-01-01,Magnetic Band (Tape),magnetic,today,1000000,,0.0
5,1956-01-01,Hard Disk Drive,magnetic,today,2000000000000,,1.0
6,1960-01-01,Magnetic stripe card,magnetic,,200,3,0.0
7,1969-01-01,Floppy Disk,magnetic,1990s,1440000,5--30,1.0
8,1970-01-01,Bubble Memory,magnetic,1980s,125000,,0.0
9,1978-01-01,LaserDisc (LD)/Discovision (DV),optical,2001,324000000,,1.0
10,1978-01-01,Solid-State-Drive (SSD),electronic,today,5000000000000,~10,1.0


In [5]:
# Add jitter to year values so that we don't have overlapping
value_count = table['year'].value_counts()
value_count.index = pd.to_datetime(value_count.index)
value_count

1978-01-01    2
2019-01-01    2
1994-01-01    2
1970-01-01    1
1956-01-01    1
1946-01-01    1
2001-01-01    1
2009-01-01    1
2004-01-01    1
1969-01-01    1
1995-01-01    1
2000-01-01    1
1960-01-01    1
1932-01-01    1
1987-01-01    1
2016-01-01    1
1981-01-01    1
1991-01-01    1
1992-01-01    1
2002-01-01    1
1949-01-01    1
1952-01-01    1
Name: year, dtype: int64

In [6]:
counter = 0
timestamp_temp = None

for i, row in table.iterrows():
    
    if (timestamp_temp != row['year']):
        timestamp_temp = row['year']
        counter = 0
        
    if (value_count[row['year']] != 1):
        table.at[i, 'year'] = row['year'] + pd.DateOffset(years=counter)
        counter+=1
    


In [7]:
table

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,access
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,1.0
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0
4,1952-01-01,Magnetic Band (Tape),magnetic,today,1000000,,0.0
5,1956-01-01,Hard Disk Drive,magnetic,today,2000000000000,,1.0
6,1960-01-01,Magnetic stripe card,magnetic,,200,3,0.0
7,1969-01-01,Floppy Disk,magnetic,1990s,1440000,5--30,1.0
8,1970-01-01,Bubble Memory,magnetic,1980s,125000,,0.0
9,1978-01-01,LaserDisc (LD)/Discovision (DV),optical,2001,324000000,,1.0
10,1979-01-01,Solid-State-Drive (SSD),electronic,today,5000000000000,~10,1.0


In [8]:
storage_types = table['type']
storage_types = storage_types.unique().astype(str)
storage_types

array(['magnetic', 'cathode ray tube', 'optical', 'electronic',
       'electro-mechanical', 'magneto-optical', 'atomic', 'molecular'],
      dtype='<U18')

In [9]:
# Defining colors

pink_custom = '#CD7DA9' #alt.value('rgb(205, 125, 169)')
blue_custom = '#2476B6' #alt.value('rgb(36, 118, 182)')
orange_custom = '#E6A02E' #alt.value('rgb(230, 160, 46)')
green_custom = '#479F77' #alt.value('rgb(71, 159, 119)')
red_custom = '#D73F47' #alt.value('rgb(215, 63, 71)')
gray_custom = '#BCBCBC' #alt.value('rgb(188, 188, 188)')


# 1. Creating chart: x = year, y = access

In [10]:
first_chart = alt.Chart(data = table).mark_circle(size=100).encode(
    alt.X('year:T'),
    alt.Y('access:N'),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom])),
).properties(
    width=900
)


In [11]:
first_chart

In [12]:


# https://github.com/altair-viz/altair/issues/984



In [13]:
second_chart = alt.Chart(data = table).mark_circle(size=60, stroke = blue_custom, fill = 'gray').encode(
    alt.X('year:T'),
    alt.Y('access:N'),
    alt.Opacity('type:N', legend = alt.Legend(title = '')),
).properties(
    width=600
).transform_filter ('datum.type == "magneto-optical"')

In [14]:
second_chart

In [15]:
third_chart = alt.Chart(data = table).mark_circle(size=60, stroke = 'gray', fill = 'white').encode(
    alt.X('year:T'),
    alt.Y('access:N'),
    alt.Opacity('type:N', legend = alt.Legend(title = '')),
).properties(
    width=600
).transform_filter ('datum.type == "atomic"')

In [16]:
third_chart

In [17]:
fourth_chart = alt.Chart(data = table).mark_circle(size=60, stroke = 'black', fill = 'white').encode(
    alt.X('year:T'),
    alt.Y('access:N'),
    alt.Opacity('type:N', legend = alt.Legend(title = ''))
).properties(
    width=600
).transform_filter ('datum.type == "molecular"')

In [18]:
fourth_chart

In [19]:

final_chart = first_chart + second_chart + third_chart + fourth_chart

In [20]:
final_chart

# 2. Creating chart: x = year, y = capacity

In [72]:
first_chart = alt.Chart(data = table).mark_bar().encode(
    alt.X('capacity:O'),
    alt.Y('name:N'),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom])),
)


In [73]:
first_chart

# 3. Creating chart: x = year, y = usage

In [21]:
table

Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,access
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1932-01-01,Drum memory,magnetic,1960s,62500,,0.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955,1000,,1.0
3,1949-01-01,Magnetic-core memory,magnetic,1970s,100000,,1.0
4,1952-01-01,Magnetic Band (Tape),magnetic,today,1000000,,0.0
5,1956-01-01,Hard Disk Drive,magnetic,today,2000000000000,,1.0
6,1960-01-01,Magnetic stripe card,magnetic,,200,3,0.0
7,1969-01-01,Floppy Disk,magnetic,1990s,1440000,5--30,1.0
8,1970-01-01,Bubble Memory,magnetic,1980s,125000,,0.0
9,1978-01-01,LaserDisc (LD)/Discovision (DV),optical,2001,324000000,,1.0
10,1979-01-01,Solid-State-Drive (SSD),electronic,today,5000000000000,~10,1.0


In [26]:
# Modifying table column "usage"
def fix_usage(year_string):
    if year_string == 'today':
        year_string = '2021'
    elif year_string[-1] == 's':
        year_string = year_string[:-2] + '5'
    return year_string

table['usage'] = table['usage'].astype(str)
table['usage'] = table['usage'].apply(fix_usage)
table['usage'] = pd.to_datetime(table['usage'])
table


Unnamed: 0_level_0,year,name,type,usage,capacity,lifespan,access
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1932-01-01,Drum memory,magnetic,1965-01-01,62500,,0.0
2,1946-01-01,Williams-Kilburn Tube,cathode ray tube,1955-01-01,1000,,1.0
3,1949-01-01,Magnetic-core memory,magnetic,1975-01-01,100000,,1.0
4,1952-01-01,Magnetic Band (Tape),magnetic,2021-01-01,1000000,,0.0
5,1956-01-01,Hard Disk Drive,magnetic,2021-01-01,2000000000000,,1.0
6,1960-01-01,Magnetic stripe card,magnetic,NaT,200,3,0.0
7,1969-01-01,Floppy Disk,magnetic,1995-01-01,1440000,5--30,1.0
8,1970-01-01,Bubble Memory,magnetic,1985-01-01,125000,,0.0
9,1978-01-01,LaserDisc (LD)/Discovision (DV),optical,2001-01-01,324000000,,1.0
10,1979-01-01,Solid-State-Drive (SSD),electronic,2021-01-01,5000000000000,~10,1.0


In [40]:
first_chart = alt.Chart(data = table).mark_bar().encode(
    alt.X('year:T'),
    alt.X2('usage:T'),
    alt.Y('name:N'),
    alt.Color('type:N', scale = alt.Scale(domain=['magnetic', 'cathode ray tube', 'optical', 'electronic', 'electro-mechanical'],
                      range=[blue_custom, pink_custom, gray_custom, orange_custom, red_custom])),
)


In [41]:
first_chart