In [2]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

# selecting entries for series

In [3]:
my_ser = Series(np.arange(4),index=['A','B','C','D'])
my_ser

A    0
B    1
C    2
D    3
dtype: int32

In [6]:
my_ser['B']

1

In [7]:
my_ser[1]

1

In [13]:
my_ser[0:2]

A    0
B    1
dtype: int32

In [10]:
my_ser[['A','B','C']]

A    0
B    1
C    2
dtype: int32

In [18]:
# we can also select data by logic which is carried out on values **not index

In [17]:
my_ser[my_ser>1]

C    2
D    3
dtype: int32

In [19]:
my_ser[my_ser>2] = 5
my_ser

A    0
B    1
C    2
D    5
dtype: int32

# data entry selection for DataFrame

In [22]:
my_df = DataFrame(np.random.randn(12).reshape(3,4),index=['A','B','C'],columns=['C1','C2','C3','C4'])
my_df

Unnamed: 0,C1,C2,C3,C4
A,1.995548,-0.671564,-0.441124,-1.058379
B,-1.100996,-0.664008,0.16684,-0.72065
C,0.461748,-0.921403,1.463662,0.997846


In [23]:
my_df['C2']

A   -0.671564
B   -0.664008
C   -0.921403
Name: C2, dtype: float64

In [25]:
my_df.loc['A']

C1    1.995548
C2   -0.671564
C3   -0.441124
C4   -1.058379
Name: A, dtype: float64

In [26]:
my_df.iloc[1]

C1   -1.100996
C2   -0.664008
C3    0.166840
C4   -0.720650
Name: B, dtype: float64

In [27]:
my_df[['C2','C1','C3']]

Unnamed: 0,C2,C1,C3
A,-0.671564,1.995548,-0.441124
B,-0.664008,-1.100996,0.16684
C,-0.921403,0.461748,1.463662


In [28]:
my_df.iloc[:,:2]

Unnamed: 0,C1,C2
A,1.995548,-0.671564
B,-1.100996,-0.664008
C,0.461748,-0.921403


In [29]:
my_df.iloc[1:,:2]

Unnamed: 0,C1,C2
B,-1.100996,-0.664008
C,0.461748,-0.921403


In [30]:
my_df[my_df['C1']<1]

Unnamed: 0,C1,C2,C3,C4
B,-1.100996,-0.664008,0.16684,-0.72065
C,0.461748,-0.921403,1.463662,0.997846


In [31]:
my_df>1 #boolean condition

Unnamed: 0,C1,C2,C3,C4
A,True,False,False,False
B,False,False,False,False
C,False,False,True,False


# data alignment for series

In [34]:
my_ser2 = Series(np.arange(8),index=['A','B','C','D','E','F','G','H'])
my_ser2

A    0
B    1
C    2
D    3
E    4
F    5
G    6
H    7
dtype: int32

In [36]:
my_ser + my_ser2 #aligns the data as per index and adds them. For values that do not exist, it gives NaN.

A    0.0
B    2.0
C    4.0
D    8.0
E    NaN
F    NaN
G    NaN
H    NaN
dtype: float64

# data alignment for dataframe

In [37]:
my_df2 = DataFrame(np.random.randn(10).reshape(2,5),index=['A','C'],columns=['M','N','C1','C2','C3'])
my_df2

Unnamed: 0,M,N,C1,C2,C3
A,-0.818659,0.620828,-0.300749,0.449441,0.950789
C,-1.167465,0.969789,0.379758,-1.714346,1.667865


In [38]:
my_df+my_df2 # adds the values present in the common row and common columns,fills remaining missing indexes and columns with NaN

Unnamed: 0,C1,C2,C3,C4,M,N
A,1.694798,-0.222124,0.509665,,,
B,,,,,,
C,0.841506,-2.63575,3.131527,,,


In [39]:
my_df.add(my_df2,fill_value=0) #avoids NaN for rows and columns that are not common for both dataframes.keeps the values as is.

Unnamed: 0,C1,C2,C3,C4,M,N
A,1.694798,-0.222124,0.509665,-1.058379,-0.818659,0.620828
B,-1.100996,-0.664008,0.16684,-0.72065,,
C,0.841506,-2.63575,3.131527,0.997846,-1.167465,0.969789


In [40]:
my_df2.add(my_df,fill_value=0)

Unnamed: 0,C1,C2,C3,C4,M,N
A,1.694798,-0.222124,0.509665,-1.058379,-0.818659,0.620828
B,-1.100996,-0.664008,0.16684,-0.72065,,
C,0.841506,-2.63575,3.131527,0.997846,-1.167465,0.969789


In [41]:
my_df2-my_ser2 #index of series doesn't match at all with columns of the dataframe

Unnamed: 0,A,B,C,C1,C2,C3,D,E,F,G,H,M,N
A,,,,,,,,,,,,,
C,,,,,,,,,,,,,


# Rank and Sort

In [45]:
my_ser2 = my_ser2.reindex(index=['A','C','D','B','G','E','F','H'])
my_ser2

A    0
C    2
D    3
B    1
G    6
E    4
F    5
H    7
dtype: int32

In [46]:
my_ser2.sort_index() # sorts index wise

A    0
B    1
C    2
D    3
E    4
F    5
G    6
H    7
dtype: int32

In [47]:
my_ser2

A    0
C    2
D    3
B    1
G    6
E    4
F    5
H    7
dtype: int32

In [50]:
my_ser2.sort_values() # sorts value wise

A    0
B    1
C    2
D    3
E    4
F    5
G    6
H    7
dtype: int32

In [51]:
my_ser2.rank()

A    1.0
C    3.0
D    4.0
B    2.0
G    7.0
E    5.0
F    6.0
H    8.0
dtype: float64

In [None]:
# the values are sorted by the sort() function based on the ranking assigned for the values internally.