In [15]:
import pandas as pd
import numpy as np

#### truncate  

In [16]:
# truncate function truncates dataframe/series based on 
# specified index value. 

# this function filters the dataframe/series using the index 
# instead of their content.

In [19]:
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(50, size=(10, 5)),
                  columns=list("ABCDE"),
                  index=pd.date_range(start="2023-01-01", 
                                      periods=10, freq="D"))
df

Unnamed: 0,A,B,C,D,E
2023-01-01,44,22,29,37,36
2023-01-02,20,7,6,26,11
2023-01-03,14,47,20,35,9
2023-01-04,8,14,48,6,4
2023-01-05,43,39,31,42,23
2023-01-06,26,18,14,1,13
2023-01-07,18,39,11,13,39
2023-01-08,40,34,9,42,17
2023-01-09,34,49,11,0,3
2023-01-10,35,25,28,44,7


In [22]:
# truncate based on row index

df.truncate(before='2023-01-05', after='2023-01-08')

Unnamed: 0,A,B,C,D,E
2023-01-05,43,39,31,42,23
2023-01-06,26,18,14,1,13
2023-01-07,18,39,11,13,39
2023-01-08,40,34,9,42,17


In [24]:
# truncate based on column index

df.truncate(after='D', axis=1)

Unnamed: 0,A,B,C,D
2023-01-01,44,22,29,37
2023-01-02,20,7,6,26
2023-01-03,14,47,20,35
2023-01-04,8,14,48,6
2023-01-05,43,39,31,42
2023-01-06,26,18,14,1
2023-01-07,18,39,11,13
2023-01-08,40,34,9,42
2023-01-09,34,49,11,0
2023-01-10,35,25,28,44


#### Filter

In [30]:
array = np.random.randint(low=50, high=90, size=(4,5))
df = pd.DataFrame(array, columns=['Afrin','Almirr','Erin','Peter','Isabel'],
                 index=['A', 'B', 'C', 'D'])
df

Unnamed: 0,Afrin,Almirr,Erin,Peter,Isabel
A,72,64,68,83,72
B,65,65,87,85,53
C,74,86,85,88,65
D,81,89,84,83,63


In [34]:
# we need only this column which contains letter 'A'

df.filter(like='A', axis=1)

Unnamed: 0,Afrin,Almirr
A,72,64
B,65,65
C,74,86
D,81,89


#### Duplicate

In [36]:
df = pd.DataFrame({'names':['A', 'B', 'M', 'M', 'A'],
                         'socre':[55,58,68,68,60]})
df

Unnamed: 0,names,socre
0,A,55
1,B,58
2,M,68
3,M,68
4,A,60


In [37]:
# same value in all cols

df['is_duplicate'] = df.duplicated()
df

Unnamed: 0,names,socre,is_duplicate
0,A,55,False
1,B,58,False
2,M,68,False
3,M,68,True
4,A,60,False


In [39]:
# we can specify the col name to find duplicate values

df['is_duplicate'] = df.duplicated(subset='names')
df

Unnamed: 0,names,socre,is_duplicate
0,A,55,False
1,B,58,False
2,M,68,False
3,M,68,True
4,A,60,True


#### first 

In [54]:
df = pd.DataFrame(np.random.randint(50, size=(10, 5)),
                  columns=list("ABCDE"),
                  index=pd.date_range(start="2023-01-01", 
                                      periods=10, freq="2D"))
df

Unnamed: 0,A,B,C,D,E
2023-01-01,27,47,46,46,25
2023-01-03,25,8,15,7,40
2023-01-05,42,13,5,6,29
2023-01-07,10,49,34,17,25
2023-01-09,13,26,1,39,5
2023-01-11,41,4,44,3,21
2023-01-13,2,21,4,10,5
2023-01-15,21,33,25,46,28
2023-01-17,18,37,24,32,11
2023-01-19,38,3,15,25,44


In [57]:
# there is a 2 days gap in every row. 
df.first('10D')

Unnamed: 0,A,B,C,D,E
2023-01-01,27,47,46,46,25
2023-01-03,25,8,15,7,40
2023-01-05,42,13,5,6,29
2023-01-07,10,49,34,17,25
2023-01-09,13,26,1,39,5


In [58]:
df.first("1W")

Unnamed: 0,A,B,C,D,E
2023-01-01,27,47,46,46,25


### nlargest

In [67]:
df = pd.DataFrame(np.random.randint(50, size=(10, 5)),
                  columns=list("ABCDE"),
                  index=pd.date_range(start="2023-01-01", 
                                      periods=10, freq="2D"))
df

Unnamed: 0,A,B,C,D,E
2023-01-01,6,18,37,12,48
2023-01-03,23,31,10,9,11
2023-01-05,3,29,25,32,9
2023-01-07,16,8,38,36,46
2023-01-09,25,38,21,27,12
2023-01-11,39,34,49,44,48
2023-01-13,22,28,9,29,12
2023-01-15,19,3,16,20,31
2023-01-17,29,15,0,33,3
2023-01-19,27,26,12,30,6


In [59]:
df.nlargest(3, columns=['A'])

Unnamed: 0,A,B,C,D,E
2023-01-05,42,13,5,6,29
2023-01-11,41,4,44,3,21
2023-01-19,38,3,15,25,44


In [61]:
# different method

df.sort_values(by='A', ascending=False).head(3)

Unnamed: 0,A,B,C,D,E
2023-01-05,42,13,5,6,29
2023-01-11,41,4,44,3,21
2023-01-19,38,3,15,25,44
