# Explicit Indexes

In [2]:
import pandas as pd
import numpy as np


portfolio = pd.read_csv("/data/workspace_files/investment_portfolio.csv")
portfolio.head()

Unnamed: 0,Product,Symbol/ISIN,Amount,Closing,Local value,Value in GBP
0,ADVANCED MICRO DEVICES,US0079031078,2,95.12,USD 190.24,155.14
1,AIRBUS SE,NL0000235190,3,106.38,EUR 319.14,270.96
2,ALPHABET INC. - CLASS C,US02079K1079,1,2330.31,USD 2330.31,1900.38
3,AMAZON.COM INC. - COM,US0231351067,1,2261.1,USD 2261.10,1843.93
4,BAE SYS.,GB0002634946,20,741.2,GBX 14824.00,148.24


In [3]:
portfolio.columns

Index(['Product', 'Symbol/ISIN', 'Amount', 'Closing', 'Local value',
       'Value in GBP'],
      dtype='object')

In [4]:
portfolio.index

RangeIndex(start=0, stop=91, step=1)

We have seen .columns and .index before. We can more a column to the index using the .set index method 

This can be undone with the .reset_index() function. Data can be removed entirely if we set (drop=True) in this 

Indexes makes subsetting code easier and we can then use .loc() instead of .isin()

In [5]:
portfolio_indexed = portfolio.set_index("Product")

portfolio_indexed

Unnamed: 0_level_0,Symbol/ISIN,Amount,Closing,Local value,Value in GBP
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ADVANCED MICRO DEVICES,US0079031078,2,95.12,USD 190.24,155.14
AIRBUS SE,NL0000235190,3,106.38,EUR 319.14,270.96
ALPHABET INC. - CLASS C,US02079K1079,1,2330.31,USD 2330.31,1900.38
AMAZON.COM INC. - COM,US0231351067,1,2261.10,USD 2261.10,1843.93
BAE SYS.,GB0002634946,20,741.20,GBX 14824.00,148.24
...,...,...,...,...,...
UNILEVER,GB00B10RZP78,3,3747.00,GBX 11241.00,112.41
VOLTA FIN,GG00B1GHHH78,600,5.90,EUR 3540.00,3005.60
WALT DISNEY COMPANY (T,US2546871060,3,107.33,USD 321.99,262.58
XTRACKERS MSCI SINGAPORE UCITS ...,LU0659578842,1200,1.27,EUR 1524.48,1294.35


In [6]:
portfolio_indexed.reset_index()

Unnamed: 0,Product,Symbol/ISIN,Amount,Closing,Local value,Value in GBP
0,ADVANCED MICRO DEVICES,US0079031078,2,95.12,USD 190.24,155.14
1,AIRBUS SE,NL0000235190,3,106.38,EUR 319.14,270.96
2,ALPHABET INC. - CLASS C,US02079K1079,1,2330.31,USD 2330.31,1900.38
3,AMAZON.COM INC. - COM,US0231351067,1,2261.10,USD 2261.10,1843.93
4,BAE SYS.,GB0002634946,20,741.20,GBX 14824.00,148.24
...,...,...,...,...,...,...
86,UNILEVER,GB00B10RZP78,3,3747.00,GBX 11241.00,112.41
87,VOLTA FIN,GG00B1GHHH78,600,5.90,EUR 3540.00,3005.60
88,WALT DISNEY COMPANY (T,US2546871060,3,107.33,USD 321.99,262.58
89,XTRACKERS MSCI SINGAPORE UCITS ...,LU0659578842,1200,1.27,EUR 1524.48,1294.35


In [7]:
portfolio_indexed.loc[["AIRBUS SE", "BAE SYS."]]

Unnamed: 0_level_0,Symbol/ISIN,Amount,Closing,Local value,Value in GBP
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AIRBUS SE,NL0000235190,3,106.38,EUR 319.14,270.96
BAE SYS.,GB0002634946,20,741.2,GBX 14824.00,148.24


We can set multiple indexes in the index. This is known as hierachal indexes, the inner one will be nested within the outer one 

The data must match up at both levels to be successfully requested with loc

In [8]:
portfolio_indexed_2 = portfolio.set_index(["Symbol/ISIN", "Product"])

portfolio_indexed_2

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount,Closing,Local value,Value in GBP
Symbol/ISIN,Product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
US0079031078,ADVANCED MICRO DEVICES,2,95.12,USD 190.24,155.14
NL0000235190,AIRBUS SE,3,106.38,EUR 319.14,270.96
US02079K1079,ALPHABET INC. - CLASS C,1,2330.31,USD 2330.31,1900.38
US0231351067,AMAZON.COM INC. - COM,1,2261.10,USD 2261.10,1843.93
GB0002634946,BAE SYS.,20,741.20,GBX 14824.00,148.24
...,...,...,...,...,...
GB00B10RZP78,UNILEVER,3,3747.00,GBX 11241.00,112.41
GG00B1GHHH78,VOLTA FIN,600,5.90,EUR 3540.00,3005.60
US2546871060,WALT DISNEY COMPANY (T,3,107.33,USD 321.99,262.58
LU0659578842,XTRACKERS MSCI SINGAPORE UCITS ...,1200,1.27,EUR 1524.48,1294.35


In [9]:
portfolio_indexed_2.loc[[("US0231351067", "AMAZON.COM INC. - COM"), ("GG00B1GHHH78", "VOLTA FIN")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount,Closing,Local value,Value in GBP
Symbol/ISIN,Product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
US0231351067,AMAZON.COM INC. - COM,1,2261.1,USD 2261.10,1843.93
GG00B1GHHH78,VOLTA FIN,600,5.9,EUR 3540.00,3005.6


We can also sort by index this will sort by outer index, then by inner index 

This can be further refined with the level= and ascending= functions 

In [10]:
portfolio_indexed_2 = portfolio_indexed_2.sort_index(level=["Symbol/ISIN", "Product"], ascending=[False, True])

portfolio_indexed_2

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount,Closing,Local value,Value in GBP
Symbol/ISIN,Product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
US98419M1009,XYLEM INC. COMMON STOC,4,83.91,USD 335.64,273.72
US8926721064,TRADEWEB MARKETS INC. - CLASS A...,30,67.27,USD 2018.10,1645.77
US8336351056,SOCIEDAD QUIMICA Y MIN,4,83.61,USD 334.44,272.74
US8334451098,SNOWFLAKE INC-CLASS A,1,158.36,USD 158.36,129.14
US78409V1044,S&P GLOBAL INC,1,336.85,USD 336.85,274.70
...,...,...,...,...,...
,ES 3400 P 19DEC25,1,334.50,USD 16725.00,13639.30
,ES 3600 P 17JUN22,-1,27.25,USD -1362.50,-1111.12
,ES 3800 P 16DEC22,-1,218.75,USD -10937.50,-8919.57
,ES 4000 P 18DEC26,-1,553.25,USD -27662.50,-22558.87


In [11]:
portfolio_indexed_2.sort_index(level=0)
portfolio_indexed_2.sort_index(level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount,Closing,Local value,Value in GBP
Symbol/ISIN,Product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
US0079031078,ADVANCED MICRO DEVICES,2,95.12,USD 190.24,155.14
NL0000235190,AIRBUS SE,3,106.38,EUR 319.14,270.96
US02079K1079,ALPHABET INC. - CLASS C,1,2330.31,USD 2330.31,1900.38
US0231351067,AMAZON.COM INC. - COM,1,2261.10,USD 2261.10,1843.93
GB0002634946,BAE SYS.,20,741.20,GBX 14824.00,148.24
...,...,...,...,...,...
GB00B10RZP78,UNILEVER,3,3747.00,GBX 11241.00,112.41
GG00B1GHHH78,VOLTA FIN,600,5.90,EUR 3540.00,3005.60
US2546871060,WALT DISNEY COMPANY (T,3,107.33,USD 321.99,262.58
LU0659578842,XTRACKERS MSCI SINGAPORE UCITS ...,1200,1.27,EUR 1524.48,1294.35


Indexes are v.useful however can violate some further big data principles i.e. "tidy data"

They are annoying as the syntaxes for working with indexes is different to that with columns, so you have to learn two syntaxes

# Slicing & Subsetting

In regular python we can slice and subset lists. In base python we would do something like this list[1:4], remember slicing starts from 0. 

We can also slice dataframes, but first we need to sort the index otherwise it won't be useful 

To slice at the outer levels of the index we can use loc. This will not work for inner index levels though, be careful as pandas will not throw an error here, so you can think the index is empty when it is really an error! As such we must pass the elements as tuples 

We can slice on rows and columns

One of the key benefits of slicing is to be able to slice by dates. We can slice by partial dates too i.e. "2020":"2022"

In [12]:
portfolio_indexed.loc["BLACKSTONE INC":"CARNIVAL"]

Unnamed: 0_level_0,Symbol/ISIN,Amount,Closing,Local value,Value in GBP
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BLACKSTONE INC,US09260D1072,3,107.82,USD 323.46,263.78
BLUEFIELD SOLAR,GG00BB0RDB98,7000,133.0,GBX 931000.00,9310.0
BP,GB0007980591,40,414.7,GBX 16588.00,165.88
BR.LAND,GB0001367019,18,501.4,GBX 9025.20,90.25
CANOPY GROWTH CORPORATION COMMO...,CA1380351009,12,5.92,USD 71.04,57.93
CARNIVAL,GB0031215220,36,1072.0,GBX 38592.00,385.92


In [13]:
# portfolio_indexed_2.index.is_monotonic_increasing

portfolio_indexed_2.loc[("US0079031078", "ADVANCED MICRO DEVICES"):("US0231351067", "AMAZON.COM INC. - COM")]

UnsortedIndexError: UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (0)'

In [73]:
portfolio_indexed_2.loc[:, "ADVANCED MICRO DEVICES", "UNILEVER"]

IndexError: IndexError: list index out of range

In [79]:
portfolio_indexed.loc["BLACKSTONE INC":"CARNIVAL", "Amount":"Local value" ]

Unnamed: 0_level_0,Amount,Closing,Local value
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BLACKSTONE INC,3,107.82,USD 323.46
BLUEFIELD SOLAR,7000,133.0,GBX 931000.00
BP,40,414.7,GBX 16588.00
BR.LAND,18,501.4,GBX 9025.20
CANOPY GROWTH CORPORATION COMMO...,12,5.92,USD 71.04
CARNIVAL,36,1072.0,GBX 38592.00


We can also slice datasets using the iloc method, integer loc, this is similar to the format we saw before with lists 

# Pivot Tables

Pivot tables have sorted indexes so we can use functions like loc that we have already seen 

We can calculate values such as the mean by using an axis argument i.e. .mean(axis="index"). If we want to calculate across columns we can use the axis="columns" argument 

In [16]:
port_pivot = portfolio.pivot_table(values="Value in GBP", index="Product")

port_pivot

Unnamed: 0_level_0,Value in GBP
Product,Unnamed: 1_level_1
ADVANCED MICRO DEVICES,155.14
AIRBUS SE,270.96
ALPHABET INC. - CLASS C,1900.38
AMAZON.COM INC. - COM,1843.93
BAE SYS.,148.24
...,...
UNILEVER,112.41
VOLTA FIN,3005.60
WALT DISNEY COMPANY (T,262.58
XTRACKERS MSCI SINGAPORE UCITS ...,1294.35


In [18]:
port_pivot.loc["BAE SYS.":"BLUEFIELD SOLAR"]

Unnamed: 0_level_0,Value in GBP
Product,Unnamed: 1_level_1
BAE SYS.,148.24
BLACKROCK INC. COMMON,501.8
BLACKSTONE GSO£,950.4
BLACKSTONE INC,263.78
BLUEFIELD SOLAR,9310.0


In [19]:
port_pivot.mean(axis="index")