In [1]:
import math
import collections
import numpy as np
import matplotlib.pyplot as pp
import pandas as pd

%matplotlib inline

In [2]:
pd.options.display.max_rows = 16

In [3]:
nobels = pd.read_csv('nobels.csv', names=['year','discipline','nobelist'])

In [4]:
nobels.index

RangeIndex(start=0, stop=950, step=1)

In [5]:
nobels_by_year = nobels.set_index('year')
# assigned set index by year to nobels_by_year

In [6]:
nobels_by_year

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
...,...,...
2019,Medicine,William Kaelin Jr.
2019,Peace,Abiy Ahmed
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [7]:
nobels_by_year.index

Int64Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
            ...
            2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
           dtype='int64', name='year', length=950)

In [8]:
nobels_by_year.loc[1901]
# Location of year 1901

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen


In [9]:
# TODO: location year of 1901 and name of nobelist
nobels_by_year.loc[1901, 'nobelist']

year
1901    Jacobus Henricus van 't Hoff
1901                 Sully Prudhomme
1901          Emil Adolf von Behring
1901                  Frédéric Passy
1901                    Henry Dunant
1901                 Wilhelm Röntgen
Name: nobelist, dtype: object

In [10]:
nobels_by_year.loc[1901:1918]
# Location of year 1901 to 1918

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
...,...,...
1917,Literature,Karl Adolph Gjellerup
1917,Peace,International Committee of the Red Cross
1917,Physics,Charles Glover Barkla
1918,Chemistry,Fritz Haber


In [11]:
nobels_by_discipline = nobels.set_index('discipline').sort_index()

In [13]:
nobels_by_discipline.head()

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Chemistry,1901,Jacobus Henricus van 't Hoff
Chemistry,1988,Robert Huber
Chemistry,1932,Irving Langmuir
Chemistry,1988,Johann Deisenhofer
Chemistry,1988,Hartmut Michel
...,...,...
Physics,1999,Gerard 't Hooft
Physics,1922,Niels Bohr
Physics,1998,Robert B. Laughlin
Physics,1921,Albert Einstein


In [14]:
# TODO: get list by string to string
nobels_by_discipline.loc['Medicine':'Peace']

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Medicine,1995,Christiane Nüsslein-Volhard
Medicine,1993,Phillip Allen Sharp
Medicine,1927,Julius Wagner-Jauregg
Medicine,1994,Alfred G. Gilman
Medicine,1993,Richard J. Roberts
...,...,...
Peace,1973,Henry Kissinger
Peace,1995,Pugwash Conferences on Science and World Affairs
Peace,1911,Alfred Hermann Fried
Peace,1973,Le Duc Tho


In [17]:
nobels_by_discipline.iloc[0:10]
# get first 10 index value

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Chemistry,1901,Jacobus Henricus van 't Hoff
Chemistry,1988,Robert Huber
Chemistry,1932,Irving Langmuir
Chemistry,1988,Johann Deisenhofer
Chemistry,1988,Hartmut Michel
Chemistry,1987,Jean-Marie Lehn
Chemistry,1987,Donald J. Cram
Chemistry,1987,Charles J. Pedersen
Chemistry,1986,Yuan T. Lee
Chemistry,1986,John Polanyi


In [18]:
# TODO: multiple index
nobels_multi = nobels.set_index(['year','discipline'])

In [19]:
nobels_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
...,...,...
2019,Medicine,William Kaelin Jr.
2019,Peace,Abiy Ahmed
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [20]:
nobels_multi.index

MultiIndex([(1901,  'Chemistry'),
            (1901, 'Literature'),
            (1901,   'Medicine'),
            (1901,      'Peace'),
            (1901,      'Peace'),
            (1901,    'Physics'),
            (1902,  'Chemistry'),
            (1902, 'Literature'),
            (1902,   'Medicine'),
            (1902,      'Peace'),
            ...
            (2019,  'Economics'),
            (2019,  'Economics'),
            (2019, 'Literature'),
            (2019,   'Medicine'),
            (2019,   'Medicine'),
            (2019,   'Medicine'),
            (2019,      'Peace'),
            (2019,    'Physics'),
            (2019,    'Physics'),
            (2019,    'Physics')],
           names=['year', 'discipline'], length=950)

In [21]:
nobels_multi.index.get_level_values(0)

Int64Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
            ...
            2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
           dtype='int64', name='year', length=950)

In [22]:
nobels_multi.index.get_level_values(1)

Index(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace',
       ...
       'Economics', 'Economics', 'Literature', 'Medicine', 'Medicine',
       'Medicine', 'Peace', 'Physics', 'Physics', 'Physics'],
      dtype='object', name='discipline', length=950)

In [25]:
# TODO: get value by year and discipline string
nobels_multi.loc[(2019, 'Physics')]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
2019,Physics,Didier Queloz
2019,Physics,James Peebles
2019,Physics,Michel Mayor


In [26]:
nobels_multi.loc[(2019:2020, 'Physics')]
# However, location does not allow multiple year location and string

SyntaxError: invalid syntax (<ipython-input-26-107d411db366>, line 1)

In [29]:
# another way multiple years confusing pandas is
nobels_multi.loc[(slice(2019,2020), 'Physics')]

KeyError: 'Physics'

In [30]:
# However, we can make it as begin
nobels_multi.loc[(slice(1901,1910), 'Physics'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Physics,Wilhelm Röntgen
1902,Physics,Hendrik Lorentz
1902,Physics,Pieter Zeeman
1903,Physics,Henri Becquerel
1903,Physics,Marie Curie
1903,Physics,Pierre Curie
1904,Physics,Lord Rayleigh
1905,Physics,Philipp Lenard
1906,Physics,J. J. Thomson
1907,Physics,Albert Abraham Michelson


In [31]:
nobels_multi.loc[(slice(None), 'Physics'), :]
# Or all year value but specific discipline

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Physics,Wilhelm Röntgen
1902,Physics,Hendrik Lorentz
1902,Physics,Pieter Zeeman
1903,Physics,Henri Becquerel
1903,Physics,Marie Curie
...,...,...
2018,Physics,Donna Strickland
2018,Physics,Gérard Mourou
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [32]:
nobels[(nobels.year >= 1901) & (nobels.year <= 1910) & (nobels.discipline == 'Chemistry')]
# adding 3 conditions

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
6,1902,Chemistry,Hermann Emil Fischer
13,1903,Chemistry,Svante Arrhenius
20,1904,Chemistry,William Ramsay
26,1905,Chemistry,Adolf von Baeyer
31,1906,Chemistry,Henri Moissan
37,1907,Chemistry,Eduard Buchner
43,1908,Chemistry,Ernest Rutherford
50,1909,Chemistry,Wilhelm Ostwald
57,1910,Chemistry,Otto Wallach


In [None]:
nobels.query('year >= 1901 and year <= 1910 and discipline == "Chemistry"')