In [2]:
people = {
    "first": ["Corey", 'Jane', 'John'],
    "last": ["Schafer", 'Doe', 'Doe'],
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
import pandas as pd

In [4]:
df = pd.DataFrame(people)

In [5]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


**VIDEO-03**

In [5]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [7]:
# what if we want to set the email column as index
df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [8]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [9]:
# pandas does not modify the original data.
# so you can modify and experiment with the data on your own.
# you can do that by explicitly mentioning that you want those changes to be made to the file.

In [10]:
df.set_index('email', inplace=True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [11]:
# now this has changed the original dataset.
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [12]:
# here we can see that the first column is email as an index

In [15]:
# in this cell the email is acting as an index
df.loc['CoreyMSchafer@gmail.com']
# so if we want to access the data of that row we can use email as an index
# to find the row.

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [17]:
# in this case, we used 'loc' which uses the label name to find the items.
# example:
df.loc['CoreyMSchafer@gmail.com', 'last']
# this way you can find the last name of the email row.

'Schafer'

In [19]:
# now we can use something like this that we did previosly
# df.loc[0]
# this will surely throw an error that loc can not find the 0th row
# because the dataframe is not labeled as 0, 1, 2, 3, 4....
# but the dataframe is labeled through email.

In [21]:
# we can still access the dataframe using integers by using 'iloc'
df.iloc[0]
# and this will give me the data available in the 0th row

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [25]:
# but now that i know, in the original data the indexes as changed inplace.
# and after a fruitful discussion i thought of reverting back.
# to the thing that i want to reset the index just like they were.
# we can achieve this by doing something like this.
df.reset_index(inplace=True)
df
# and this code will reset the indexes just like they were.
# and you can go on experimenting on the dataset just like it was in the beginning.

Unnamed: 0,index,email,first,last
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


**FILTERING THE DATAFRAME**

In [7]:
# we run th first 4 cells to access the dataframe.

In [8]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
# this is a type of check that we made.
# so in our column where the value is true for the given assignment that will be true.

filt = (df['last'] == 'Doe')

In [13]:
df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [14]:
df.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [17]:
# we can use & as AND operator
filt = (df['last'] == 'Doe') & (df['first'] == 'John')

In [18]:
df.loc[filt, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

In [19]:
# we can use | as OR operator
filt = (df['last'] == 'Schafer') | (df['first'] == 'John')

In [20]:
df.loc[filt, 'email']

0    CoreyMSchafer@gmail.com
2          JohnDoe@email.com
Name: email, dtype: object

In [21]:
# we can also use a NEGATION filter.
# negation is like you get the opposite of the result.
# NEGATION can be achieved using the ~ this symbol
# Example
df.loc[~filt, 'email']

1    JaneDoe@email.com
Name: email, dtype: object