## Indexing

In [1]:
# importing the necessary libraries

import pandas as pd
import numpy as np

In [2]:
# creating a time series using the date_range method from pandas
time_series = pd.date_range('1/1/2020', periods=20)

df = pd.DataFrame(np.random.randn(20, 5),   # 20 refers to the number of rows
                                            # 5 refers to the number of columns
                  index=time_series,
                  columns=['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5'])

In [3]:
# lets check the dataframe that we have just created 

df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-1.789321,-0.278597,-0.229798,-1.096811,1.136031
2020-01-02,-0.243208,1.466849,-0.235352,1.058109,0.824187
2020-01-03,1.968548,-1.983476,0.41151,0.279251,-0.180419
2020-01-04,1.436399,0.468874,0.700868,-0.881337,1.597997
2020-01-05,1.014153,-0.94693,-0.255071,-1.066207,-0.52348
2020-01-06,-1.152094,0.274068,1.845283,-0.701014,-1.645433
2020-01-07,1.490099,-1.141983,-0.803758,0.334796,-0.210032
2020-01-08,0.996368,0.37711,-1.709778,1.060681,1.243241
2020-01-09,-1.250499,-1.542394,-1.024098,0.075323,0.773572
2020-01-10,-0.877473,2.397848,1.658575,-1.429273,-1.318977


In [4]:
df['Column 1']

2020-01-01   -1.789321
2020-01-02   -0.243208
2020-01-03    1.968548
2020-01-04    1.436399
2020-01-05    1.014153
2020-01-06   -1.152094
2020-01-07    1.490099
2020-01-08    0.996368
2020-01-09   -1.250499
2020-01-10   -0.877473
2020-01-11    2.428282
2020-01-12   -0.528421
2020-01-13    0.992141
2020-01-14   -0.099093
2020-01-15    0.925970
2020-01-16    0.453023
2020-01-17    2.211618
2020-01-18    0.543534
2020-01-19    0.026867
2020-01-20    0.038628
Freq: D, Name: Column 1, dtype: float64

In [5]:
df[['Column 1', 'Column 2', 'Column 3']]

Unnamed: 0,Column 1,Column 2,Column 3
2020-01-01,-1.789321,-0.278597,-0.229798
2020-01-02,-0.243208,1.466849,-0.235352
2020-01-03,1.968548,-1.983476,0.41151
2020-01-04,1.436399,0.468874,0.700868
2020-01-05,1.014153,-0.94693,-0.255071
2020-01-06,-1.152094,0.274068,1.845283
2020-01-07,1.490099,-1.141983,-0.803758
2020-01-08,0.996368,0.37711,-1.709778
2020-01-09,-1.250499,-1.542394,-1.024098
2020-01-10,-0.877473,2.397848,1.658575


In [7]:
# timeseries specific indexing

col1 = df['Column 1']
col1[time_series[3]]

1.4363990181616693

In [8]:
# creating a dataframe using the dictionary structure
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

# selection by position
# specifying new values to the row number 1
x.loc[1] = {'x': 9, 'y': 99}

# printing the resultant dataframe
print(x)

   x   y
0  1   3
1  9  99
2  3   5


## Slicing

In [9]:
df.iloc[:5, 0:2]

Unnamed: 0,Column 1,Column 2
2020-01-01,-1.789321,-0.278597
2020-01-02,-0.243208,1.466849
2020-01-03,1.968548,-1.983476
2020-01-04,1.436399,0.468874
2020-01-05,1.014153,-0.94693


In [10]:
#Striding
df[::3]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-1.789321,-0.278597,-0.229798,-1.096811,1.136031
2020-01-04,1.436399,0.468874,0.700868,-0.881337,1.597997
2020-01-07,1.490099,-1.141983,-0.803758,0.334796,-0.210032
2020-01-10,-0.877473,2.397848,1.658575,-1.429273,-1.318977
2020-01-13,0.992141,0.647536,0.612202,1.499981,0.403181
2020-01-16,0.453023,-2.094457,-1.582662,-1.006039,-0.974619
2020-01-19,0.026867,0.379394,0.624902,0.046261,3.715867


## Filtering

In [11]:
df[(df['Column 3'] < 0)]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-1.789321,-0.278597,-0.229798,-1.096811,1.136031
2020-01-02,-0.243208,1.466849,-0.235352,1.058109,0.824187
2020-01-05,1.014153,-0.94693,-0.255071,-1.066207,-0.52348
2020-01-07,1.490099,-1.141983,-0.803758,0.334796,-0.210032
2020-01-08,0.996368,0.37711,-1.709778,1.060681,1.243241
2020-01-09,-1.250499,-1.542394,-1.024098,0.075323,0.773572
2020-01-12,-0.528421,-0.113945,-0.020237,0.488454,0.544267
2020-01-15,0.92597,1.709762,-0.369189,-1.224123,2.377181
2020-01-16,0.453023,-2.094457,-1.582662,-1.006039,-0.974619
2020-01-17,2.211618,0.49855,-0.123761,0.113161,-0.570776


In [12]:
df[(df['Column 1'] < 0) & (df['Column 2'] > 0)][['Column 4', 'Column 5']]

Unnamed: 0,Column 4,Column 5
2020-01-02,1.058109,0.824187
2020-01-06,-0.701014,-1.645433
2020-01-10,-1.429273,-1.318977
