In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame({
    'name': ['Robert', 'Felicia', 'Diana'],
    'age': [22, 34, 27],
    'occupation': ['Designer', 'Programmer', 'Data Scientist']
})

df = df.set_index('name')
df['summary'] = df.apply(lambda row: f'Age: {row["age"]}, Occupation: {row["occupation"]}', axis=1)
df

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Robert,22,Designer,"Age: 22, Occupation: Designer"
Felicia,34,Programmer,"Age: 34, Occupation: Programmer"
Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist"


In [4]:
df.loc['Jim'] = [None, None, None]
df

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Robert,22.0,Designer,"Age: 22, Occupation: Designer"
Felicia,34.0,Programmer,"Age: 34, Occupation: Programmer"
Diana,27.0,Data Scientist,"Age: 27, Occupation: Data Scientist"
Jim,,,


In [5]:
df.age > 30 # you can filter data by using boolean operators

name
Robert     False
Felicia     True
Diana      False
Jim        False
Name: age, dtype: bool

In [6]:
df[[False, True, False, False]] # and use them to show only the filtered data 

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Felicia,34,Programmer,"Age: 34, Occupation: Programmer"


In [7]:
df[df.age > 30] # by creating a list, it will show only the rows that correspond to the filtering

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Felicia,34,Programmer,"Age: 34, Occupation: Programmer"


In [8]:
(df.age > 30) & (df.occupation.notna()) # filtering using boolean and logical operators

name
Robert     False
Felicia     True
Diana      False
Jim        False
dtype: bool

In [9]:
df[(df.age > 30) & (df.occupation.notna())]

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Felicia,34,Programmer,"Age: 34, Occupation: Programmer"


In [10]:
(df.age > 30) | (df.occupation.notna())

name
Robert      True
Felicia     True
Diana       True
Jim        False
dtype: bool

In [11]:
df[(df.age > 30) | (df.occupation.notna())]

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Robert,22,Designer,"Age: 22, Occupation: Designer"
Felicia,34,Programmer,"Age: 34, Occupation: Programmer"
Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist"


In [12]:
df[-df.occupation.notna()]

Unnamed: 0_level_0,age,occupation,summary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jim,,,


In [13]:
df = df.reset_index()

In [14]:
df[df.name.str.endswith('a') & (df.age.notna())]

Unnamed: 0,name,age,occupation,summary
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer"
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist"


In [15]:
df.age = df.age.fillna(45)

  df.age = df.age.fillna(45)


In [16]:
df

Unnamed: 0,name,age,occupation,summary
0,Robert,22,Designer,"Age: 22, Occupation: Designer"
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer"
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist"
3,Jim,45,,


In [17]:
import datetime as dt

df['birthday'] = df['age'].apply(lambda x: dt.datetime.now() - dt.timedelta(days=365*x))
# this lambda expression will create a birthday date for each person on the data frame based on their age

In [18]:
df

Unnamed: 0,name,age,occupation,summary,birthday
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
3,Jim,45,,,1980-07-21 13:29:45.568533


In [19]:
df[df.birthday.dt.year > 1990] # by adding a birthday, you can also filter data base in the year they were born

Unnamed: 0,name,age,occupation,summary,birthday
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531


In [20]:
ages = [22,45]

df[df.age.isin(ages)] # you can also create a list and use it as a filter

Unnamed: 0,name,age,occupation,summary,birthday
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
3,Jim,45,,,1980-07-21 13:29:45.568533


In [21]:
df.query('age > 25') # or, instead of using the previous syntax, you can also use the query function

Unnamed: 0,name,age,occupation,summary,birthday
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
3,Jim,45,,,1980-07-21 13:29:45.568533


In [22]:
df.occupation = df.occupation.fillna('Unemployed')

In [24]:
df

Unnamed: 0,name,age,occupation,summary,birthday
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
3,Jim,45,Unemployed,,1980-07-21 13:29:45.568533


In [25]:
# Sorting data

df.sort_values('age')

Unnamed: 0,name,age,occupation,summary,birthday
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
3,Jim,45,Unemployed,,1980-07-21 13:29:45.568533


In [26]:
df.sort_values('age', ascending=False)

Unnamed: 0,name,age,occupation,summary,birthday
3,Jim,45,Unemployed,,1980-07-21 13:29:45.568533
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518


In [27]:
df.sort_values('occupation')

Unnamed: 0,name,age,occupation,summary,birthday
2,Diana,27,Data Scientist,"Age: 27, Occupation: Data Scientist",1998-07-17 13:29:45.568531
0,Robert,22,Designer,"Age: 22, Occupation: Designer",2003-07-16 13:29:45.568518
1,Felicia,34,Programmer,"Age: 34, Occupation: Programmer",1991-07-19 13:29:45.568528
3,Jim,45,Unemployed,,1980-07-21 13:29:45.568533
