In [77]:
import pandas as pd
import numpy as np

In [78]:
data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

df = pd.DataFrame(data, index=labels)


In [79]:
df.info()
# df.describe()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   animal    10 non-null     object 
 1   age       8 non-null      float64
 2   visits    10 non-null     int64  
 3   priority  10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


In [80]:
df.head(10)

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [81]:
df[['animal', 'age']] #just the 'animal' and 'age' columns from the DataFrame df.

Unnamed: 0,animal,age
a,cat,2.5
b,cat,3.0
c,snake,0.5
d,dog,
e,dog,5.0
f,cat,2.0
g,snake,4.5
h,cat,
i,dog,7.0
j,dog,3.0


In [82]:
df[['animal','age']].iloc[[3,4,8]] #data in rows [3, 4, 8] and in columns ['animal', 'age'].

Unnamed: 0,animal,age
d,dog,
e,dog,5.0
i,dog,7.0


In [83]:
visitsMoreThanThree = df["visits"] > 3
df[visitsMoreThanThree]

Unnamed: 0,animal,age,visits,priority


In [84]:
df[df.age.isna()] #or df[df["age"].isna()] 
# df[df.animal.isnull()]

Unnamed: 0,animal,age,visits,priority
d,dog,,3,yes
h,cat,,1,yes


In [85]:
df[((df.animal == "cat") | (df.animal == "dog")) & (df.age < 3)]
#or
df[(df.animal.isin(["cat", "dog"])) & (df.age)]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
i,dog,7.0,2,no
j,dog,3.0,1,no


In [86]:
df[df['age'].between(2, 4)] #rows the age is between 2 and 4 (inclusive).

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
f,cat,2.0,3,no
j,dog,3.0,1,no


In [87]:
df.loc['f', 'age'] = 1.5 #Change the age in row 'f' to 1.5

In [88]:
df['visits'].sum() #calculate the sum of all visits

np.int64(19)

In [89]:
df.groupby('animal')['age'].mean() #Calculate the mean age for each different animal in `df`.
df.groupby("animal")["visits"].mean() #Calculate the mean visits for each different animal in df.

animal
cat      2.0
dog      2.0
snake    1.5
Name: visits, dtype: float64

In [90]:
df['animal'].value_counts()

animal
cat      4
dog      4
snake    2
Name: count, dtype: int64

In [91]:
df.sort_values(by=['age', 'visits'], ascending=[False, True])

Unnamed: 0,animal,age,visits,priority
i,dog,7.0,2,no
e,dog,5.0,2,no
g,snake,4.5,1,no
j,dog,3.0,1,no
b,cat,3.0,3,yes
a,cat,2.5,1,yes
f,cat,1.5,3,no
c,snake,0.5,2,no
h,cat,,1,yes
d,dog,,3,yes


In [92]:
df['priority'] = df['priority'].map({'yes': True, 'no': False}) #The 'priority' column contains the values 'yes' and 'no'. 
#Replace this column with a column of boolean values: 'yes' should be True and 'no' should be False.
df.head()

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,True
b,cat,3.0,3,True
c,snake,0.5,2,False
d,dog,,3,True
e,dog,5.0,2,False


In [93]:
df.pivot_table(index='animal', columns='visits', values='age', aggfunc='mean')

visits,1,2,3
animal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,2.5,,2.25
dog,3.0,6.0,
snake,4.5,0.5,


Beyond the basics

In [98]:
df = pd.DataFrame({'A': [1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7]})

df.loc[df['A'].shift() != df['A']]

# Alternatively, we could use drop_duplicates() here. Note
# that this removes *all* duplicates though, so it won't
# work as desired if A is [1, 1, 2, 2, 1, 1] for example.

# df.drop_duplicates(subset='A')

Unnamed: 0,A
0,1
1,2
3,3
4,4
5,5
8,6
9,7


In [99]:
df = pd.DataFrame(np.random.random(size=(5, 3)))

df.sub(df.mean(axis=1), axis=0)

Unnamed: 0,0,1,2
0,0.521418,-0.26086,-0.260558
1,0.163279,-0.184752,0.021473
2,0.080808,-0.168105,0.087298
3,-0.270495,0.308287,-0.037792
4,0.232704,-0.043983,-0.188721
