### Import Data

In [1]:
import pandas as pd

### MultiIndex theory

**The list of tuples**

In [2]:
addresses = [
("8809 Flair Square", "Toddside", "IL", "37206"),
("9901 Austin Street", "Toddside", "IL", "37206"),
("905 Hogan Quarter", "Franklin", "IL", "37206"),
]

**Create a MultiIndex**

In [3]:
pd.MultiIndex.from_tuples(tuples = addresses)

MultiIndex([( '8809 Flair Square', 'Toddside', 'IL', '37206'),
            ('9901 Austin Street', 'Toddside', 'IL', '37206'),
            ( '905 Hogan Quarter', 'Franklin', 'IL', '37206')],
           )

In [4]:
row_index = pd.MultiIndex.from_tuples(
tuples = addresses,
names = ["Street", "City", "State", "Zip"]
)

In [5]:
row_index

MultiIndex([( '8809 Flair Square', 'Toddside', 'IL', '37206'),
            ('9901 Austin Street', 'Toddside', 'IL', '37206'),
            ( '905 Hogan Quarter', 'Franklin', 'IL', '37206')],
           names=['Street', 'City', 'State', 'Zip'])

**Our dataframe**

In [6]:
data = [
["A", "B+"],
["C+", "C"],
["D-", "A"],
]

In [7]:
columns = ["Schools", "Cost of Living"]

In [8]:
area_grades = pd.DataFrame(
data = data, index = row_index, columns = columns
)

In [9]:
area_grades

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Schools,Cost of Living
Street,City,State,Zip,Unnamed: 4_level_1,Unnamed: 5_level_1
8809 Flair Square,Toddside,IL,37206,A,B+
9901 Austin Street,Toddside,IL,37206,C+,C
905 Hogan Quarter,Franklin,IL,37206,D-,A


In [10]:
area_grades.columns

Index(['Schools', 'Cost of Living'], dtype='object')

**Create a second dataframe**

In [11]:
column_index = pd.MultiIndex.from_tuples(
    [
        ("Culture", "Restaurants"),
        ("Culture", "Museums"),
        ("Services", "Police"),
        ("Services", "Schools"),
    ]
)

In [12]:
column_index

MultiIndex([( 'Culture', 'Restaurants'),
            ( 'Culture',     'Museums'),
            ('Services',      'Police'),
            ('Services',     'Schools')],
           )

In [13]:
data = [
["C-", "B+", "B-", "A"],
["D+", "C", "A", "C+"],
["A-", "A", "D+", "F"]
]

In [14]:
pd.DataFrame(
data = data, index = row_index, columns = column_index
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Restaurants,Museums,Police,Schools
Street,City,State,Zip,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
8809 Flair Square,Toddside,IL,37206,C-,B+,B-,A
9901 Austin Street,Toddside,IL,37206,D+,C,A,C+
905 Hogan Quarter,Franklin,IL,37206,A-,A,D+,F


### MultiIndex DataFrames

In [16]:
neighborhoods_path = '/Users/ypushiev/Learning/PANDAS IN ACTION/Chapter 7 MultiIndex/Data/neighborhoods.csv'

**index_col attribute will create MultiIndex for columns 0,1,2 and header will use first two rows like header of column**

In [25]:
df_neighborhoods = pd.read_csv(neighborhoods_path,index_col=[0,1,2],header=[0,1])
df_neighborhoods.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO,Fisherborough,244 Tracy View,C+,F,D-,A+
SD,Port Curtisville,446 Cynthia Inlet,C-,B,B,D+
WV,Jimenezview,432 John Common,A,A+,F,B
AK,Stevenshire,238 Andrew Rue,D-,A,A-,A-
ND,New Joshuaport,877 Walter Neck,D+,C-,B,B


In [28]:
df_neighborhoods.index.names

FrozenList(['State', 'City', 'Street'])

In [31]:
df_neighborhoods.columns

MultiIndex([( 'Culture', 'Restaurants'),
            ( 'Culture',     'Museums'),
            ('Services',      'Police'),
            ('Services',     'Schools')],
           )

In [29]:
df_neighborhoods.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 251 entries, ('MO', 'Fisherborough', '244 Tracy View') to ('NE', 'South Kennethmouth', '346 Wallace Pass')
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   (Culture, Restaurants)  251 non-null    object
 1   (Culture, Museums)      251 non-null    object
 2   (Services, Police)      251 non-null    object
 3   (Services, Schools)     251 non-null    object
dtypes: object(4)
memory usage: 27.1+ KB


#### get_level_values method

**df_neighborhoods.index.get_level_values(1) returns a second value of our MultiIndex => City**

In [37]:
df_neighborhoods.index.get_level_values(1)  # returns a second value of our MultiIndex => City

Index(['Fisherborough', 'Port Curtisville', 'Jimenezview', 'Stevenshire',
       'New Joshuaport', 'Wellsville', 'Jodiburgh', 'Lake Christopher',
       'Port Mike', 'Hardyburgh',
       ...
       'Scottstad', 'Port Willieport', 'Port Linda', 'Kaylamouth',
       'Port Shawnfort', 'North Matthew', 'Chadton', 'Diazmouth', 'Laurentown',
       'South Kennethmouth'],
      dtype='object', name='City', length=251)

**columns levels are empty, let's create names for them**

In [38]:
df_neighborhoods.columns.names

FrozenList([None, None])

In [39]:
df_neighborhoods.columns.names = ["Category", "Subcategory"]
df_neighborhoods.columns.names

FrozenList(['Category', 'Subcategory'])

**Now we have different level for our columns - Category: Culture,Services and Subcategory:Restaurants,Museums,Police,Schools**

In [42]:
print(df_neighborhoods.head())

Category                                     Culture         Services        
Subcategory                              Restaurants Museums   Police Schools
State City             Street                                                
MO    Fisherborough    244 Tracy View             C+       F       D-      A+
SD    Port Curtisville 446 Cynthia Inlet          C-       B        B      D+
WV    Jimenezview      432 John Common             A      A+        F       B
AK    Stevenshire      238 Andrew Rue             D-       A       A-      A-
ND    New Joshuaport   877 Walter Neck            D+      C-        B       B


In [46]:
df_neighborhoods.columns.get_level_values('Category')

Index(['Culture', 'Culture', 'Services', 'Services'], dtype='object', name='Category')

In [47]:
df_neighborhoods.nunique()

Category  Subcategory
Culture   Restaurants    13
          Museums        13
Services  Police         13
          Schools        13
dtype: int64

### Sorting a MultiIndex

In [48]:
df_neighborhoods.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO,Fisherborough,244 Tracy View,C+,F,D-,A+
SD,Port Curtisville,446 Cynthia Inlet,C-,B,B,D+
WV,Jimenezview,432 John Common,A,A+,F,B
AK,Stevenshire,238 Andrew Rue,D-,A,A-,A-
ND,New Joshuaport,877 Walter Neck,D+,C-,B,B


**The Sorting will be performed step by step - 1 State, 2 City and 3 Address**

In [49]:
df_neighborhoods.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AK,Rowlandchester,386 Rebecca Cove,C-,A-,A+,C
AK,Scottstad,082 Leblanc Freeway,D,C-,D,B+
AK,Scottstad,114 Jones Garden,D-,D-,D,D
AK,Stevenshire,238 Andrew Rue,D-,A,A-,A-
AL,Clarkland,430 Douglas Mission,A,F,C+,B+
...,...,...,...,...,...,...
WY,Lake Nicole,754 Weaver Turnpike,B,D-,B,D
WY,Lake Nicole,933 Jennifer Burg,C,A+,A-,C
WY,Martintown,013 Bell Mills,C-,D,A-,B-
WY,Port Jason,624 Faulkner Orchard,A-,F,C+,C+


In [51]:
df_neighborhoods.sort_index(ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
WY,Reneeshire,717 Patel Square,B,B+,D,A
WY,Port Jason,624 Faulkner Orchard,A-,F,C+,C+
WY,Martintown,013 Bell Mills,C-,D,A-,B-
WY,Lake Nicole,933 Jennifer Burg,C,A+,A-,C
WY,Lake Nicole,754 Weaver Turnpike,B,D-,B,D


#### Multiple sorting

In [54]:
df_neighborhoods.sort_index(ascending=[False,True,False]).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
WY,Hardyburgh,227 Andrew Parkway,D,C-,D-,C
WY,Lake Nicole,933 Jennifer Burg,C,A+,A-,C
WY,Lake Nicole,754 Weaver Turnpike,B,D-,B,D
WY,Martintown,013 Bell Mills,C-,D,A-,B-
WY,Port Jason,624 Faulkner Orchard,A-,F,C+,C+


#### Sorting with only one or two values  in the MultiIndex

In [55]:
df_neighborhoods.sort_index(level='City').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AR,Allisonland,124 Diaz Brooks,C-,A+,F,C+
GA,Amyburgh,941 Brian Expressway,B,B,D-,C+
IA,Amyburgh,163 Heather Neck,F,D,A+,A-
ID,Andrewshire,952 Ellis Drive,C+,A-,C+,A
UT,Baileyfort,919 Stewart Hills,D+,C+,A,C


In [56]:
df_neighborhoods.sort_index(level=['City','Street'], ascending=[True,False]).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AR,Allisonland,124 Diaz Brooks,C-,A+,F,C+
GA,Amyburgh,941 Brian Expressway,B,B,D-,C+
IA,Amyburgh,163 Heather Neck,F,D,A+,A-
ID,Andrewshire,952 Ellis Drive,C+,A-,C+,A
UT,Baileyfort,919 Stewart Hills,D+,C+,A,C


#### Sorting using the axis for columns

*Services, Culture*

In [61]:
print(df_neighborhoods.sort_index(axis='columns',ascending=False).head())

Category                                 Services            Culture        
Subcategory                               Schools Police Restaurants Museums
State City             Street                                               
MO    Fisherborough    244 Tracy View          A+     D-          C+       F
SD    Port Curtisville 446 Cynthia Inlet       D+      B          C-       B
WV    Jimenezview      432 John Common          B      F           A      A+
AK    Stevenshire      238 Andrew Rue          A-     A-          D-       A
ND    New Joshuaport   877 Walter Neck          B      B          D+      C-


*Culture,Services*

In [62]:
print(df_neighborhoods.sort_index(axis='columns',ascending=True).head())

Category                                 Culture             Services        
Subcategory                              Museums Restaurants   Police Schools
State City             Street                                                
MO    Fisherborough    244 Tracy View          F          C+       D-      A+
SD    Port Curtisville 446 Cynthia Inlet       B          C-        B      D+
WV    Jimenezview      432 John Common        A+           A        F       B
AK    Stevenshire      238 Andrew Rue          A          D-       A-      A-
ND    New Joshuaport   877 Walter Neck        C-          D+        B       B


#### Sorting columns using the level

*Museums   Police Restaurants  Schools*

In [66]:
print(df_neighborhoods.sort_index(axis='columns',level='Subcategory', ascending=True).head())

Category                                 Culture Services     Culture Services
Subcategory                              Museums   Police Restaurants  Schools
State City             Street                                                 
MO    Fisherborough    244 Tracy View          F       D-          C+       A+
SD    Port Curtisville 446 Cynthia Inlet       B        B          C-       D+
WV    Jimenezview      432 John Common        A+        F           A        B
AK    Stevenshire      238 Andrew Rue          A       A-          D-       A-
ND    New Joshuaport   877 Walter Neck        C-        B          D+        B


In [67]:
df_neighborhoods.sort_index(ascending=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AK,Rowlandchester,386 Rebecca Cove,C-,A-,A+,C
AK,Scottstad,082 Leblanc Freeway,D,C-,D,B+
AK,Scottstad,114 Jones Garden,D-,D-,D,D
AK,Stevenshire,238 Andrew Rue,D-,A,A-,A-
AL,Clarkland,430 Douglas Mission,A,F,C+,B+
...,...,...,...,...,...,...
WY,Lake Nicole,754 Weaver Turnpike,B,D-,B,D
WY,Lake Nicole,933 Jennifer Burg,C,A+,A-,C
WY,Martintown,013 Bell Mills,C-,D,A-,B-
WY,Port Jason,624 Faulkner Orchard,A-,F,C+,C+
