## 1 Setup

### 1.1 Import Libraries

In [1]:
from datetime import datetime

import pandas as pd

print("Pandas version:", pd.__version__)

Pandas version: 2.3.0


### 1.2 Import Data

In [2]:
mk_df = pd.read_csv("data/mckinsey.csv")
mk_df.head(3)

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.85303
2,Afghanistan,1962,10267083,Asia,31.997,853.10071


In [3]:
employee_data = {
    "Name": ["Alex", "Ajax", "Jane", "John", "Anna"],
    "Age": [31, 31, 28, 35, 40],
    "Role": ["Senior SD", "Associate Architect", "Junior SD", "Architect", "V.P."],
    "DOJ": ["01-06-2021", "01-01-2025", "01-03-2023", "01-12-2022", "01-08-2000"],
}

emp_df = pd.DataFrame(data=employee_data)
emp_df

Unnamed: 0,Name,Age,Role,DOJ
0,Alex,31,Senior SD,01-06-2021
1,Ajax,31,Associate Architect,01-01-2025
2,Jane,28,Junior SD,01-03-2023
3,John,35,Architect,01-12-2022
4,Anna,40,V.P.,01-08-2000


In [4]:
mk_df.shape

(1704, 6)

### 1.3 Update Index

#### Mckinskey `DataFrame`

##### Before update

In [5]:
mk_df.index.values

array([   0,    1,    2, ..., 1701, 1702, 1703], shape=(1704,))

In [6]:
mk_df.index = list(range(1, len(mk_df) + 1))

##### After update

In [7]:
mk_df.index.values

array([   1,    2,    3, ..., 1702, 1703, 1704], shape=(1704,))

#### Employee `DataFrame`

##### Before update

In [8]:
emp_df.index.values

array([0, 1, 2, 3, 4])

In [9]:
emp_df.index = ["E01", "E02", "E03", "E04", "E05"]

##### After update

In [10]:
emp_df.index.values

array(['E01', 'E02', 'E03', 'E04', 'E05'], dtype=object)

## 2 Access Rows

### 2.1 The Problem

#### Case #1: Using Single Index

In [11]:
try:
    mk_df[1]
except KeyError as err:
    print("KeyError:", err)

KeyError: 1


In [12]:
try:
    emp_df["E05"]
except KeyError as err:
    print("KeyError:", err)

KeyError: 'E05'


#### Case #2: Using Multiple Index

In [13]:
try:
    mk_df[1, 2, 3]
except KeyError as err:
    print("KeyError:", err)

KeyError: (1, 2, 3)


In [14]:
try:
    emp_df["E03", "E04", "EO5"]
except KeyError as err:
    print("KeyError:", err)

KeyError: ('E03', 'E04', 'EO5')


#### Case #3: Using Slicing

In [15]:
mk_df[100:115:5]

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
101,Bangladesh,1972,70759295,Asia,45.252,630.233627
106,Bangladesh,1997,123315288,Asia,59.412,972.770035
111,Belgium,1962,9218400,Europe,70.25,10991.20676


In [16]:
emp_df["E03":"EO5"]

Unnamed: 0,Name,Age,Role,DOJ
E03,Jane,28,Junior SD,01-03-2023
E04,John,35,Architect,01-12-2022
E05,Anna,40,V.P.,01-08-2000


### 2.2 Solution

### 2.3 Using `loc`

#### Case #1: Using Single Index

In [17]:
mk_df.loc[1]

country       Afghanistan
year                 1952
population        8425333
continent            Asia
life_exp           28.801
gdp_cap        779.445314
Name: 1, dtype: object

In [18]:
emp_df.loc["E05"]

Name          Anna
Age             40
Role          V.P.
DOJ     01-08-2000
Name: E05, dtype: object

#### Case #2: Using Multiple Index

In [19]:
mk_df.loc[[1, 2, 3]]

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
1,Afghanistan,1952,8425333,Asia,28.801,779.445314
2,Afghanistan,1957,9240934,Asia,30.332,820.85303
3,Afghanistan,1962,10267083,Asia,31.997,853.10071


In [20]:
emp_df.loc[["E03", "E04", "E05"]]

Unnamed: 0,Name,Age,Role,DOJ
E03,Jane,28,Junior SD,01-03-2023
E04,John,35,Architect,01-12-2022
E05,Anna,40,V.P.,01-08-2000


#### Case #3: Using Slicing

In [21]:
mk_df.loc[100:115:5]

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
100,Bangladesh,1967,62821884,Asia,43.453,721.186086
105,Bangladesh,1992,113704579,Asia,56.018,837.810164
110,Belgium,1957,8989111,Europe,69.24,9714.960623
115,Belgium,1982,9856303,Europe,73.93,20979.84589


In [22]:
emp_df.loc["E03":"E05"]

Unnamed: 0,Name,Age,Role,DOJ
E03,Jane,28,Junior SD,01-03-2023
E04,John,35,Architect,01-12-2022
E05,Anna,40,V.P.,01-08-2000


### 2.4 Using `iloc`

1. Since Implicit index supports negative values `iloc` supports negative value.
2. End index is exclusive while slicing.

#### Case #1: Using Single Index

In [23]:
mk_df.iloc[0]

country       Afghanistan
year                 1952
population        8425333
continent            Asia
life_exp           28.801
gdp_cap        779.445314
Name: 1, dtype: object

In [24]:
emp_df.iloc[4]

Name          Anna
Age             40
Role          V.P.
DOJ     01-08-2000
Name: E05, dtype: object

#### Case #2: Using Multiple Index

In [25]:
mk_df.iloc[[0, 10, 1703]]

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
1,Afghanistan,1952,8425333,Asia,28.801,779.445314
11,Afghanistan,2002,25268405,Asia,42.129,726.734055
1704,Zimbabwe,2007,12311143,Africa,43.487,469.709298


In [26]:
emp_df.iloc[[4, 1, 3]]

Unnamed: 0,Name,Age,Role,DOJ
E05,Anna,40,V.P.,01-08-2000
E02,Ajax,31,Associate Architect,01-01-2025
E04,John,35,Architect,01-12-2022


#### Case #3: Using Slicing

In [27]:
mk_df.iloc[100:115:5]

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
101,Bangladesh,1972,70759295,Asia,45.252,630.233627
106,Bangladesh,1997,123315288,Asia,59.412,972.770035
111,Belgium,1962,9218400,Europe,70.25,10991.20676


With `iloc` the end index is exclusive and it works on implicit index.

In [28]:
emp_df.iloc[0:3]

Unnamed: 0,Name,Age,Role,DOJ
E01,Alex,31,Senior SD,01-06-2021
E02,Ajax,31,Associate Architect,01-01-2025
E03,Jane,28,Junior SD,01-03-2023


### 2.5 Conclusion

## 3 Access Columns

##### Example #1

In [29]:
emp_df["Name"]

E01    Alex
E02    Ajax
E03    Jane
E04    John
E05    Anna
Name: Name, dtype: object

Explicit row index can be used with Series.

In [30]:
emp_df["Name"]["E05"]

'Anna'

##### Example #2

In [31]:
mk_df["continent"].tail(3)

1702    Africa
1703    Africa
1704    Africa
Name: continent, dtype: object

Explicit row index can be used with Series.

In [32]:
mk_df["continent"][1702]

'Africa'

In [33]:
mk_df.head(3)

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
1,Afghanistan,1952,8425333,Asia,28.801,779.445314
2,Afghanistan,1957,9240934,Asia,30.332,820.85303
3,Afghanistan,1962,10267083,Asia,31.997,853.10071


In [34]:
mk_df.loc[:2, "year":"continent"]

Unnamed: 0,year,population,continent
1,1952,8425333,Asia
2,1957,9240934,Asia


```python
pd.Series(['a','b','c'], index=[1,2,2])
print(a[2])
```

In [35]:
mk_df["country"][1]

'Afghanistan'

#### Using `loc` and `iloc`

In [36]:
# temp["column name 1"]

In [37]:
# temp.loc["a"]

In [38]:
# temp.loc[["a"]]

In [39]:
# temp.loc[["a", "c"]]

In [40]:
# temp.reset_index(drop=True)

In [41]:
# df.loc[9::-3]

### 9.3 Access Index

In [42]:
# df.head(10)

In [43]:
# pd.DataFrame(df, columns=['country', 'year'])
# df.iloc[:, 0:2]

In [44]:
# df.nunique()

In [45]:
# df.iloc[:20]

## Insert

In [46]:
emp_df

Unnamed: 0,Name,Age,Role,DOJ
E01,Alex,31,Senior SD,01-06-2021
E02,Ajax,31,Associate Architect,01-01-2025
E03,Jane,28,Junior SD,01-03-2023
E04,John,35,Architect,01-12-2022
E05,Anna,40,V.P.,01-08-2000


In [47]:
emp_df.loc["E06"] = ["Bob", 30, "Junior SD", "01-07-2025"]
emp_df

Unnamed: 0,Name,Age,Role,DOJ
E01,Alex,31,Senior SD,01-06-2021
E02,Ajax,31,Associate Architect,01-01-2025
E03,Jane,28,Junior SD,01-03-2023
E04,John,35,Architect,01-12-2022
E05,Anna,40,V.P.,01-08-2000
E06,Bob,30,Junior SD,01-07-2025


In [48]:
try:
    emp_df.iloc[6] = ["Kavin", 30, "Junior SD", "01-07-2025"]
except IndexError as err:
    print("IndexError:", err)

IndexError: iloc cannot enlarge its target object
