## Intro to the MultiIndex Module:

In [1]:
import pandas as pd

In [2]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"]) ## parse_date will convert the values to date time object
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [3]:
bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [4]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


## Create a MultiIndex with the .set_index() Method:

In [5]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"])
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [6]:
bigmac.set_index(keys = "Country") ## .set_index() takes that column or that series and set it as the new index of dataframe

Unnamed: 0_level_0,Date,Price in US Dollars
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2016-01-01,2.39
Australia,2016-01-01,3.74
Brazil,2016-01-01,3.35
Britain,2016-01-01,4.22
Canada,2016-01-01,4.14
...,...,...
Turkey,2010-01-01,3.83
UAE,2010-01-01,2.99
Ukraine,2010-01-01,1.83
United States,2010-01-01,3.58


In [7]:
## MultiIndex is just an index with multiple levels or multiple layers

In [8]:
bigmac.set_index(keys = ["Date", "Country"], inplace = True)

In [9]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [10]:
bigmac.sort_index(inplace = True)

In [11]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [12]:
bigmac.index.names

FrozenList(['Date', 'Country'])

In [13]:
type(bigmac.index)

pandas.core.indexes.multi.MultiIndex

In [14]:
bigmac.index[0]

(Timestamp('2010-01-01 00:00:00'), 'Argentina')

## The .get_level_values() Method:

In [15]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [16]:
## get level value method is called on index of a multi index dataframe in order to extract one of the levels or layers. more specifically the values within that specific layer.

In [17]:
bigmac.index.get_level_values(0)
bigmac.index.get_level_values("Date")

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

## The .set_names() Method:

In [18]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [19]:
bigmac.index.set_names(["Date", "Location"], inplace = True)

In [20]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


## The .sort_index() Method on a MultiIndex DataFrame:

In [21]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [22]:
bigmac.sort_index(ascending = [True, False], inplace = True)

In [23]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Uruguay,3.32
2010-01-01,United States,3.58
2010-01-01,Ukraine,1.83
2010-01-01,UAE,2.99
2010-01-01,Turkey,3.83


## Extract Rows from a MultiIndex DataFrame:

In [24]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [25]:
bigmac.loc[("2010-01-01", "Brazil"), "Price in US Dollars"]

Date        Country
2010-01-01  Brazil     4.76
Name: Price in US Dollars, dtype: float64

In [26]:
bigmac.loc[("2015-07-01", "Chile"), "Price in US Dollars"]

Date        Country
2015-07-01  Chile      3.27
Name: Price in US Dollars, dtype: float64

In [27]:
bigmac.i[("2016-01-01", "China")]

AttributeError: 'DataFrame' object has no attribute 'i'

## The .transpose() Method and MultiIndex on Column Level:

In [None]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

In [None]:
bigmac = bigmac.transpose() ## it swaps the axis, so it takes the vertical column moves it to the left to horizontal axis

In [None]:
bigmac.head()

## The .swaplevel() Method:

In [None]:
bigmac = pd.read_csv("C:/Users/kkher/Desktop/Pandas/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head()

In [None]:
## swaps predictively the levels within a multi index

In [None]:
bigmac = bigmac.swaplevel()

In [None]:
bigmac.head()

## The .stack() Method:

In [None]:
##  it takes the columns at the top and moves that index to main index, the one on the left horizontal one

In [None]:
world = pd.read_csv("C:/Users/kkher/Desktop/Pandas/worldstats.csv", index_col = ["country", "year"])
world.head()

In [None]:
world.stack().to_frame()

## The .unstack() Method, Part 1:

In [None]:
world = pd.read_csv("C:/Users/kkher/Desktop/Pandas/worldstats.csv", index_col = ["country", "year"])
world.head()

In [None]:
s = world.stack()
s.head()

In [None]:
s.unstack().unstack().unstack()

## The .unstack() Method, Part 2:

In [None]:
world = pd.read_csv("C:/Users/kkher/Desktop/Pandas/worldstats.csv", index_col = ["country", "year"])
world.head()

In [None]:
s = world.stack()
s.head()

In [None]:
s.unstack(-1)
s.unstack(2)

s.unstack(-2)
s.unstack(1)

s.unstack(-3)
s.unstack(0)

In [None]:
s.unstack("year")

In [None]:
s.unstack("country")

## The .unstack() Method, Part 3:

In [None]:
world = pd.read_csv("C:/Users/kkher/Desktop/Pandas/worldstats.csv", index_col = ["country", "year"])
s = world.stack()
world.head()

In [None]:
s.unstack(level = [1, 0])

In [None]:
s.unstack(level = ["country", "year"])

In [None]:
s.unstack("year", fill_value = 0)

In [None]:
s.head()

## The .pivot() Method:

In [None]:
## used to reorient dataset. used to take the values that are currently in a column & convert them to column headers

In [None]:
sales = pd.read_csv("C:/Users/kkher/Desktop/Pandas/salesmen.csv", parse_dates = ["Date"])
sales["Salesman"] = sales["Salesman"].astype("category")
sales.head()

In [None]:
len(sales)

In [None]:
sales["Salesman"].value_counts()

In [None]:
sales.pivot(index = "Date", columns = "Salesman", values = "Revenue")

In [None]:
len(sales.pivot(index = "Date", columns = "Salesman", values = "Revenue"))

## The .pivot_table() Method:

In [None]:
## it takes a dataframe or a dataset and aggregates it to look at the values as a whole

In [None]:
foods = pd.read_csv("C:/Users/kkher/Desktop/Pandas/foods.csv")
foods.head()

In [None]:
foods.pivot_table(values = "Spend", index = "Gender", aggfunc = "sum")

In [None]:
foods.pivot_table(values = "Spend", index = "Item", aggfunc = "sum")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], aggfunc = "sum")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], columns = "City", aggfunc = "sum")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], columns = ["Frequency", "City"], aggfunc = "sum")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], columns = "City", aggfunc = "count")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], columns = "City", aggfunc = "max")

In [None]:
foods.pivot_table(values = "Spend", index = ["Gender", "Item"], columns = "City", aggfunc = "min").head()

In [None]:
pd.pivot_table(data = foods, values = "Spend", index = ["Gender", "Item"], columns = "City", aggfunc = "min")

## The pd.melt() Method:

In [None]:
sales = pd.read_csv("C:/Users/kkher/Desktop/Pandas/quarters.csv")
sales.head()

In [None]:
pd.melt(sales, id_vars = "Salesman", var_name = "Quarter", value_name = "Revenue") ## melt method is used to change the DataFrame format from wide to long