In [117]:
import pandas as pd
import numpy as np

## Indexing columns of DataFrame and labels of series:

In [224]:
a_series = pd.Series([1,2,3,4,5])


In [227]:
a_series[2]

np.int64(3)

In [233]:
a_dataframe = pd.DataFrame(np.arange(9).reshape(3,3), columns=['a', 'b', 'c'])
a_dataframe

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [231]:
a_dataframe['a']

0    0
1    3
2    6
Name: a, dtype: int64

## Creating a new column in DataFrame:


In [235]:
a_dataframe['new_col_name'] = [9, 10, 11]
a_dataframe

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9
1,3,4,5,10
2,6,7,8,11


In [237]:
a_series[5] = 6
a_series

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

## slicing ranges:

In [239]:
a_dataframe 

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9
1,3,4,5,10
2,6,7,8,11


In [257]:
a_dataframe[['a', 'c']] # start, stop, step

Unnamed: 0,a,c
0,0,2
1,3,5
2,6,8


In [258]:
a_series

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [261]:
a_series[:3:2]

0    1
2    3
dtype: int64

## Loc and iloc:

In [262]:
a_dataframe

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9
1,3,4,5,10
2,6,7,8,11


In [268]:
a_dataframe.loc[0:1]

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9
1,3,4,5,10


In [270]:
a_dataframe.loc[0:2, 'a':'c']

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [276]:
a_series.loc[0::2]

0    1
2    3
4    5
dtype: int64

In [277]:
a_dataframe.iloc[0:1]

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9


In [278]:
a_dataframe.iloc[0:2, 0:2]

Unnamed: 0,a,b
0,0,1
1,3,4


## isin():

In [281]:
a_dataframe

Unnamed: 0,a,b,c,new_col_name
0,0,1,2,9
1,3,4,5,10
2,6,7,8,11


In [285]:
a_dataframe.isin([1,2,3])

Unnamed: 0,a,b,c,new_col_name
0,False,True,True,False
1,True,False,False,False
2,False,False,False,False


## where():

In [295]:
eg_1 = np.arange(9).reshape(3,3)
df = pd.DataFrame(eg_1)

df.where (df % 2 == 0, 'odd') ## In pandas where adds 'odd' to the other dataframe rather than to those where logic is applied.


Unnamed: 0,0,1,2
0,0,odd,2
1,odd,4,odd
2,6,odd,8


In [287]:
eg_2 = np.arange(9).reshape(3,3)
eg_2

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [294]:
np.where(eg_2% 2 == 0, 'even', 'odd') # In numpy paramerers = (condition, do this if true, do this if false)

array([['even', 'odd', 'even'],
       ['odd', 'even', 'odd'],
       ['even', 'odd', 'even']], dtype='<U4')

In [None]:
eg_3 = np.arange(-5, 4).reshape(3,3)
eg_3

In [296]:
cond_list = [
    eg_3 < 0,
    eg_3 % 2 == 0,
    eg_3 % 2 != 0
]

choice_list = [
    "negative",
    "Even",
    "Odd"
]

np.select(cond_list, choice_list, default = 'wrong')



array([['negative', 'negative', 'negative'],
       ['negative', 'negative', 'Even'],
       ['Odd', 'Even', 'Odd']], dtype='<U8')

## set_index():

In [144]:
a = np.arange(-5, 10).reshape(5, 3)

df = pd.DataFrame(a, index=[chr(i) for i in range(97, 102)], columns= [0, 1, 2])
df

Unnamed: 0,0,1,2
a,-5,-4,-3
b,-2,-1,0
c,1,2,3
d,4,5,6
e,7,8,9


In [201]:
df2 = df.set_index([0, 1],drop=False, append=True)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2
Unnamed: 0_level_1,0,1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,-5,-4,-5,-4,-3
1,-2,-1,-2,-1,0
2,1,2,1,2,3
3,4,5,4,5,6
4,7,8,7,8,9


In [168]:
df # No change in df

Unnamed: 0,0,1,2
a,-5,-4,-3
b,-2,-1,0
c,1,2,3
d,4,5,6
e,7,8,9


In [207]:
df3 = df.set_index([0, 1],drop=False, append=True, inplace=True, verify_integrity=True) # As much times as you run this, the process happens and changes are made in original df only
df3

In [212]:
df # As if such mistake occurs than we can't undo it easily. So, the best practice is to make at least a copy before performing set_index( inplace = True ).

Unnamed: 0,0,1,2
0,-5,-4,-3
1,-2,-1,0
2,1,2,3
3,4,5,6
4,7,8,9


## reset_index():

In [213]:
df.reset_index( drop=True, inplace=True) # All indexes are discarded and replaced by [0,1,2,....,n] number of indexes.

In [214]:
df

Unnamed: 0,0,1,2
0,-5,-4,-3
1,-2,-1,0
2,1,2,3
3,4,5,6
4,7,8,9


## Assigning a custom indexing:

In [217]:
df4 = pd.DataFrame(np.arange(-5, 10).reshape(5, 3))
df4

Unnamed: 0,0,1,2
0,-5,-4,-3
1,-2,-1,0
2,1,2,3
3,4,5,6
4,7,8,9


In [219]:
df4.index #checking indexes

RangeIndex(start=0, stop=5, step=1)

In [223]:
df4.index = pd.Index([1, 2, 3, 4, 5], name='Index')
df4

Unnamed: 0_level_0,0,1,2
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,-5,-4,-3
2,-2,-1,0
3,1,2,3
4,4,5,6
5,7,8,9
