# DATAFRAMES
### They are the main tools for using pandas

In [4]:
# SETUP
import numpy as np
import pandas as pd
from numpy.random import randn
from numpy.random import randint
np.random.seed(101) # For getting same random numbers

## Multi-index and Index Hierarchy

In [5]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [6]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [7]:
inside

[1, 2, 3, 1, 2, 3]

In [11]:
list(zip(outside,inside)) # Using list and zip function

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [13]:
# Making a multi index out of hier_index
pd.MultiIndex.from_tuples(hier_index)

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [14]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [17]:
# Two levels of index
df = pd.DataFrame(randn(6,2),hier_index,['A','B'])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.134841,0.390528
G1,2,0.166905,0.184502
G1,3,0.807706,0.07296
G2,1,0.638787,0.329646
G2,2,-0.497104,-0.75407
G2,3,-0.943406,0.484752


In [21]:
# Everything in G1
print(df.loc['G1'])
# Everything in G2
print(df.loc['G2'])

          A         B
1 -0.134841  0.390528
2  0.166905  0.184502
3  0.807706  0.072960
          A         B
1  0.638787  0.329646
2 -0.497104 -0.754070
3 -0.943406  0.484752


In [40]:
# Calling inside series
# G1, 1
df.loc['G1'].loc[1]

A   -0.134841
B    0.390528
Name: 1, dtype: float64

In [41]:
# Calling inside series
# G2, 2
df.loc['G2'].loc[3]

A   -0.943406
B    0.484752
Name: 3, dtype: float64

In [43]:
# Notice that the index do not have any names
df.index.names

FrozenList([None, None])

In [44]:
# Setting up the names for the index
# G - Group, [1,2,3] - Num
df.index.names = ['Group','Num']

In [45]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.134841,0.390528
G1,2,0.166905,0.184502
G1,3,0.807706,0.07296
G2,1,0.638787,0.329646
G2,2,-0.497104,-0.75407
G2,3,-0.943406,0.484752


# Indexing out the elements

In [46]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.134841,0.390528
G1,2,0.166905,0.184502
G1,3,0.807706,0.07296
G2,1,0.638787,0.329646
G2,2,-0.497104,-0.75407
G2,3,-0.943406,0.484752


In [59]:
# Meathod 1
G2 = df.loc['G2']
loc_2 = G2.loc[2]
loc_2['A']

-0.49710402288933153

In [60]:
# Meathod 2
df.loc['G2'].loc[2] ['A']

-0.49710402288933153

# Cross-section
 CROSS SECTION CAN ALSO SKIP THE MULTI LEVEL INDEX FOR GRABBING OUT THE DATA QUICKLY

In [63]:
# Just for grabbing out the data too
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.134841,0.390528
2,0.166905,0.184502
3,0.807706,0.07296


In [76]:
# Skipping out the multi-level of the data
# xs("Specifying what you want", level = "Name of the index")
# Everything out of '1'
df.xs(1,level = 'Num')

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.134841,0.390528
G2,0.638787,0.329646


In [77]:
# Everthing out of '2'
df.xs(2,level = 'Num')

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.166905,0.184502
G2,-0.497104,-0.75407


# END