In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

In [3]:
df = pd.DataFrame(data=data, index=labels)

In [4]:
df.head()

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   animal    10 non-null     object 
 1   age       8 non-null      float64
 2   visits    10 non-null     int64  
 3   priority  10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


In [6]:
df.head(3)

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


In [7]:
df[['age','animal']]

Unnamed: 0,age,animal
a,2.5,cat
b,3.0,cat
c,0.5,snake
d,,dog
e,5.0,dog
f,2.0,cat
g,4.5,snake
h,,cat
i,7.0,dog
j,3.0,dog


In [8]:
df[['animal', 'age']].iloc[[3,4,8]]

Unnamed: 0,animal,age
d,dog,
e,dog,5.0
i,dog,7.0


In [9]:
df[df['visits'] > 3]

Unnamed: 0,animal,age,visits,priority


In [10]:
df[df.age.isna()]

Unnamed: 0,animal,age,visits,priority
d,dog,,3,yes
h,cat,,1,yes


In [11]:
df[(df.animal == 'cat') & (df.age<3)]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
f,cat,2.0,3,no


In [12]:
df[(df.age<4)&(df.age>=2)]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
f,cat,2.0,3,no
j,dog,3.0,1,no


In [13]:
df.loc['f', 'age'] = 1.5

In [14]:
df['visits'].sum()

np.int64(19)

In [15]:
df.groupby(['animal'])['age'].agg('mean')

animal
cat      2.333333
dog      5.000000
snake    2.500000
Name: age, dtype: float64

In [16]:
df.loc['k'] = ['Horse',3 , 2, 'yes']
df

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,1.5,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [17]:
df.drop(index=['k'])

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,1.5,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [18]:
df['count'] = 1
df.groupby(['animal'])['count'].sum()

animal
Horse    1
cat      4
dog      4
snake    2
Name: count, dtype: int64

In [19]:
df.sort_values(by=['age'],ascending=True)

Unnamed: 0,animal,age,visits,priority,count
c,snake,0.5,2,no,1
f,cat,1.5,3,no,1
a,cat,2.5,1,yes,1
b,cat,3.0,3,yes,1
j,dog,3.0,1,no,1
k,Horse,3.0,2,yes,1
g,snake,4.5,1,no,1
e,dog,5.0,2,no,1
i,dog,7.0,2,no,1
d,dog,,3,yes,1


In [20]:
mapped_dict = {'yes': True, 'no': False}
df["priority"] = df.priority.map(mapped_dict)
df
# This can be done with replace method also

Unnamed: 0,animal,age,visits,priority,count
a,cat,2.5,1,True,1
b,cat,3.0,3,True,1
c,snake,0.5,2,False,1
d,dog,,3,True,1
e,dog,5.0,2,False,1
f,cat,1.5,3,False,1
g,snake,4.5,1,False,1
h,cat,,1,True,1
i,dog,7.0,2,False,1
j,dog,3.0,1,False,1


In [21]:
df['animal'].replace({'snake':'python'}, inplace=True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['animal'].replace({'snake':'python'}, inplace=True)


Unnamed: 0,animal,age,visits,priority,count
a,cat,2.5,1,True,1
b,cat,3.0,3,True,1
c,python,0.5,2,False,1
d,dog,,3,True,1
e,dog,5.0,2,False,1
f,cat,1.5,3,False,1
g,python,4.5,1,False,1
h,cat,,1,True,1
i,dog,7.0,2,False,1
j,dog,3.0,1,False,1


In [22]:
df.pivot_table(values='age', index='animal', columns='visits', aggfunc='mean')

visits,1,2,3
animal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Horse,,3.0,
cat,2.5,,2.25
dog,3.0,6.0,
python,4.5,0.5,


In [36]:
df = pd.DataFrame({'A': [1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7]})

In [37]:
df

Unnamed: 0,A
0,1
1,2
2,2
3,3
4,4
5,5
6,5
7,5
8,6
9,7


In [38]:
df['shifted_A'] = df['A'].shift(1)
df_filtered = df[df['A'] != df['shifted_A']]
df_filtered = df_filtered.drop(columns=['shifted_A'])
df_filtered

Unnamed: 0,A
0,1
1,2
3,3
4,4
5,5
8,6
9,7


In [43]:
df = pd.DataFrame(np.random.random(size=(5,3)))
row_mean = df.mean(axis=1)
df = df.sub(row_mean, axis=0)
df

Unnamed: 0,0,1,2
0,0.445025,-0.36241,-0.082616
1,0.082992,0.360325,-0.443318
2,-0.125784,0.077912,0.047872
3,-0.063891,-0.193336,0.257228
4,-0.151698,0.101971,0.049727
