## Indexing

In [1]:
# importing the necessary libraries

import pandas as pd
import numpy as np

In [2]:
# creating a time series using the date_range method from pandas
time_series = pd.date_range('22/10/2021', periods=20)

df = pd.DataFrame(np.random.randn(20, 5),   # 20 refers to the number of rows
                                            # 5 refers to the number of columns
                  index=time_series,
                  columns=['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5'])

In [3]:
# lets check the dataframe that we have just created 

df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2021-10-22,0.932633,1.055944,0.426332,2.057782,-0.027959
2021-10-23,1.349068,0.149944,0.549795,-0.633415,-0.082379
2021-10-24,0.171142,-0.084648,-1.032822,-1.195666,-1.193039
2021-10-25,0.844117,-0.780893,-0.878101,0.354678,0.267783
2021-10-26,-0.348569,0.176687,-0.784619,-0.583037,-0.103379
2021-10-27,-0.034624,-0.419274,0.018454,-0.41317,-0.771652
2021-10-28,0.29697,-0.213428,-0.180496,-0.138012,-1.275658
2021-10-29,1.561555,-1.067341,-0.346785,-1.218965,0.119665
2021-10-30,0.622849,-0.483543,0.658197,-0.663745,-1.176723
2021-10-31,0.615584,0.672485,-0.034213,1.888471,1.089482


In [4]:
df['Column 1']

2021-10-22    0.932633
2021-10-23    1.349068
2021-10-24    0.171142
2021-10-25    0.844117
2021-10-26   -0.348569
2021-10-27   -0.034624
2021-10-28    0.296970
2021-10-29    1.561555
2021-10-30    0.622849
2021-10-31    0.615584
2021-11-01   -0.131229
2021-11-02   -0.254065
2021-11-03   -0.481678
2021-11-04   -0.756467
2021-11-05   -0.053304
2021-11-06    0.391909
2021-11-07   -1.136353
2021-11-08    0.305579
2021-11-09   -2.069315
2021-11-10   -0.387857
Freq: D, Name: Column 1, dtype: float64

In [5]:
df[['Column 1', 'Column 2', 'Column 3']]

Unnamed: 0,Column 1,Column 2,Column 3
2021-10-22,0.932633,1.055944,0.426332
2021-10-23,1.349068,0.149944,0.549795
2021-10-24,0.171142,-0.084648,-1.032822
2021-10-25,0.844117,-0.780893,-0.878101
2021-10-26,-0.348569,0.176687,-0.784619
2021-10-27,-0.034624,-0.419274,0.018454
2021-10-28,0.29697,-0.213428,-0.180496
2021-10-29,1.561555,-1.067341,-0.346785
2021-10-30,0.622849,-0.483543,0.658197
2021-10-31,0.615584,0.672485,-0.034213


In [6]:
# timeseries specific indexing

col1 = df['Column 1']
col1[time_series[3]]

0.8441171054067895

In [7]:
# creating a dataframe using the dictionary structure
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

# selection by position
# specifying new values to the row number 1
x.loc[1] = {'x': 9, 'y': 99}

# printing the resultant dataframe
print(x)

   x   y
0  1   3
1  9  99
2  3   5


## Slicing

In [8]:
df.iloc[:5, 0:2]  # :5 is index & :2 is column

Unnamed: 0,Column 1,Column 2
2021-10-22,0.932633,1.055944
2021-10-23,1.349068,0.149944
2021-10-24,0.171142,-0.084648
2021-10-25,0.844117,-0.780893
2021-10-26,-0.348569,0.176687


In [9]:
#Striding
df[::3]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2021-10-22,0.932633,1.055944,0.426332,2.057782,-0.027959
2021-10-25,0.844117,-0.780893,-0.878101,0.354678,0.267783
2021-10-28,0.29697,-0.213428,-0.180496,-0.138012,-1.275658
2021-10-31,0.615584,0.672485,-0.034213,1.888471,1.089482
2021-11-03,-0.481678,-0.269261,-1.508401,-0.364649,-0.790399
2021-11-06,0.391909,-0.975457,1.129673,-0.813889,-0.069928
2021-11-09,-2.069315,0.101747,1.483164,0.507182,1.892603


## Filtering

In [10]:
df[(df['Column 3'] < 0)]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2021-10-24,0.171142,-0.084648,-1.032822,-1.195666,-1.193039
2021-10-25,0.844117,-0.780893,-0.878101,0.354678,0.267783
2021-10-26,-0.348569,0.176687,-0.784619,-0.583037,-0.103379
2021-10-28,0.29697,-0.213428,-0.180496,-0.138012,-1.275658
2021-10-29,1.561555,-1.067341,-0.346785,-1.218965,0.119665
2021-10-31,0.615584,0.672485,-0.034213,1.888471,1.089482
2021-11-01,-0.131229,-1.198331,-1.078865,1.611416,-0.599404
2021-11-02,-0.254065,1.303257,-1.269357,-0.062391,0.400228
2021-11-03,-0.481678,-0.269261,-1.508401,-0.364649,-0.790399
2021-11-05,-0.053304,0.220094,-0.582145,2.018124,1.045749


In [11]:
df[(df['Column 1'] < 0) & (df['Column 2'] > 0)][['Column 4', 'Column 5']]

Unnamed: 0,Column 4,Column 5
2021-10-26,-0.583037,-0.103379
2021-11-02,-0.062391,0.400228
2021-11-05,2.018124,1.045749
2021-11-07,1.467695,-0.404883
2021-11-09,0.507182,1.892603
