# Data Cleansing

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/us_indicators_raw.csv')

In [3]:
df

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,31/1/2001,1.32,1.5,89.24,147.07,inrange,ease
1,28/2/2001,1.44,1.5,90.47,145.49,inrange,ease
2,31/3/2001,1.44,-0.5,89.33,140.02,inrange,ease
3,30/4/2001,2.52,-1.0,87.32,138.88,inrange,ease
4,31/5/2001,2.76,1.5,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
278,29/2/2024,-0.77,2.5,118.51,718.27,outrrange,tight
279,31/3/2024,-0.47,2.5,118.11,711.58,outrrange,tight
280,30/4/2024,0.19,2.5,116.80,695.69,outrrange,tight
281,31/5/2024,1.54,2.5,117.21,702.68,inrange,tight


## Data Type

In [4]:
modified_df = df.copy()

### Number

In [5]:
modified_df['policy_rate'].dtype

dtype('float64')

In [6]:
modified_df['policy_rate'] = modified_df['policy_rate'].astype(int)

In [7]:
modified_df['policy_rate'].dtype

dtype('int64')

In [8]:
modified_df

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,31/1/2001,1.32,1,89.24,147.07,inrange,ease
1,28/2/2001,1.44,1,90.47,145.49,inrange,ease
2,31/3/2001,1.44,0,89.33,140.02,inrange,ease
3,30/4/2001,2.52,-1,87.32,138.88,inrange,ease
4,31/5/2001,2.76,1,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
278,29/2/2024,-0.77,2,118.51,718.27,outrrange,tight
279,31/3/2024,-0.47,2,118.11,711.58,outrrange,tight
280,30/4/2024,0.19,2,116.80,695.69,outrrange,tight
281,31/5/2024,1.54,2,117.21,702.68,inrange,tight


In [9]:
modified_df['policy_rate'] = modified_df['policy_rate'].astype(float)

In [10]:
modified_df['policy_rate'].dtype

dtype('float64')

In [11]:
modified_df

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,31/1/2001,1.32,1.0,89.24,147.07,inrange,ease
1,28/2/2001,1.44,1.0,90.47,145.49,inrange,ease
2,31/3/2001,1.44,0.0,89.33,140.02,inrange,ease
3,30/4/2001,2.52,-1.0,87.32,138.88,inrange,ease
4,31/5/2001,2.76,1.0,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
278,29/2/2024,-0.77,2.0,118.51,718.27,outrrange,tight
279,31/3/2024,-0.47,2.0,118.11,711.58,outrrange,tight
280,30/4/2024,0.19,2.0,116.80,695.69,outrrange,tight
281,31/5/2024,1.54,2.0,117.21,702.68,inrange,tight


### Text

In [12]:
modified_df['money_supply'].dtype

dtype('float64')

In [13]:
modified_df['money_supply'] = modified_df['money_supply'] * 1000

In [14]:
modified_df

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,31/1/2001,1.32,1.0,89.24,147070.0,inrange,ease
1,28/2/2001,1.44,1.0,90.47,145490.0,inrange,ease
2,31/3/2001,1.44,0.0,89.33,140020.0,inrange,ease
3,30/4/2001,2.52,-1.0,87.32,138880.0,inrange,ease
4,31/5/2001,2.76,1.0,87.16,139490.0,inrange,ease
...,...,...,...,...,...,...,...
278,29/2/2024,-0.77,2.0,118.51,718270.0,outrrange,tight
279,31/3/2024,-0.47,2.0,118.11,711580.0,outrrange,tight
280,30/4/2024,0.19,2.0,116.80,695690.0,outrrange,tight
281,31/5/2024,1.54,2.0,117.21,702680.0,inrange,tight


In [15]:
modified_df['money_supply'] = modified_df['money_supply'].astype(str)

In [16]:
modified_df['money_supply'].dtype

dtype('O')

In [17]:
modified_df['money_supply'] = modified_df['money_supply'] + " MUSD"

In [18]:
modified_df

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,31/1/2001,1.32,1.0,89.24,147070.0 MUSD,inrange,ease
1,28/2/2001,1.44,1.0,90.47,145490.0 MUSD,inrange,ease
2,31/3/2001,1.44,0.0,89.33,140020.0 MUSD,inrange,ease
3,30/4/2001,2.52,-1.0,87.32,138880.0 MUSD,inrange,ease
4,31/5/2001,2.76,1.0,87.16,139490.0 MUSD,inrange,ease
...,...,...,...,...,...,...,...
278,29/2/2024,-0.77,2.0,118.51,718270.0 MUSD,outrrange,tight
279,31/3/2024,-0.47,2.0,118.11,711580.0 MUSD,outrrange,tight
280,30/4/2024,0.19,2.0,116.80,695690.0 MUSD,outrrange,tight
281,31/5/2024,1.54,2.0,117.21,702680.0 MUSD,inrange,tight


### Date Time

In [19]:
df['date'].dtype

dtype('O')

In [20]:
df.sort_values(by='date')

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
1,28/2/2001,1.44,1.50,90.47,145.49,inrange,ease
14,28/2/2002,0.30,2.00,92.56,149.90,outrrange,tight
13,28/2/2002,0.36,2.00,92.56,149.90,outrrange,tight
26,28/2/2003,100.00,1.75,87.85,157.63,inrange,ease
50,28/2/2005,2.48,2.00,90.58,198.82,inrange,tight
...,...,...,...,...,...,...,...
224,31/8/2019,0.52,1.50,125.18,664.44,outrrange,ease
236,31/8/2020,-0.50,0.50,122.21,724.00,outrrange,ease
248,31/8/2021,-0.02,0.50,114.27,724.77,outrrange,ease
260,31/8/2022,7.86,0.75,114.34,672.19,outrrange,ease


In [21]:
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y')

see date format at https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [22]:
df['date'].dtype

dtype('<M8[ns]')

In [23]:
df.sort_values(by='date')

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,2001-01-31,1.32,1.5,89.24,147.07,inrange,ease
1,2001-02-28,1.44,1.5,90.47,145.49,inrange,ease
2,2001-03-31,1.44,-0.5,89.33,140.02,inrange,ease
3,2001-04-30,2.52,-1.0,87.32,138.88,inrange,ease
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
278,2024-02-29,-0.77,2.5,118.51,718.27,outrrange,tight
279,2024-03-31,-0.47,2.5,118.11,711.58,outrrange,tight
280,2024-04-30,0.19,2.5,116.80,695.69,outrrange,tight
281,2024-05-31,1.54,2.5,117.21,702.68,inrange,tight


## Data Filtering

### Filter Column

#### By Name

In [24]:
policy_cols = ['date', 'policy_rate', 'money_supply', 'type_of_monetary_policy']

In [25]:
df[policy_cols]

Unnamed: 0,date,policy_rate,money_supply,type_of_monetary_policy
0,2001-01-31,1.5,147.07,ease
1,2001-02-28,1.5,145.49,ease
2,2001-03-31,-0.5,140.02,ease
3,2001-04-30,-1.0,138.88,ease
4,2001-05-31,1.5,139.49,ease
...,...,...,...,...
278,2024-02-29,2.5,718.27,tight
279,2024-03-31,2.5,711.58,tight
280,2024-04-30,2.5,695.69,tight
281,2024-05-31,2.5,702.68,tight


In [26]:
df.drop(columns=['cpi', 'inflation_target'])

Unnamed: 0,date,policy_rate,neer,money_supply,type_of_monetary_policy
0,2001-01-31,1.5,89.24,147.07,ease
1,2001-02-28,1.5,90.47,145.49,ease
2,2001-03-31,-0.5,89.33,140.02,ease
3,2001-04-30,-1.0,87.32,138.88,ease
4,2001-05-31,1.5,87.16,139.49,ease
...,...,...,...,...,...
278,2024-02-29,2.5,118.51,718.27,tight
279,2024-03-31,2.5,118.11,711.58,tight
280,2024-04-30,2.5,116.80,695.69,tight
281,2024-05-31,2.5,117.21,702.68,tight


In [27]:
df.loc[:, policy_cols]

Unnamed: 0,date,policy_rate,money_supply,type_of_monetary_policy
0,2001-01-31,1.5,147.07,ease
1,2001-02-28,1.5,145.49,ease
2,2001-03-31,-0.5,140.02,ease
3,2001-04-30,-1.0,138.88,ease
4,2001-05-31,1.5,139.49,ease
...,...,...,...,...
278,2024-02-29,2.5,718.27,tight
279,2024-03-31,2.5,711.58,tight
280,2024-04-30,2.5,695.69,tight
281,2024-05-31,2.5,702.68,tight


#### By Index

In [28]:
df.iloc[:, :4]

Unnamed: 0,date,cpi,policy_rate,neer
0,2001-01-31,1.32,1.5,89.24
1,2001-02-28,1.44,1.5,90.47
2,2001-03-31,1.44,-0.5,89.33
3,2001-04-30,2.52,-1.0,87.32
4,2001-05-31,2.76,1.5,87.16
...,...,...,...,...
278,2024-02-29,-0.77,2.5,118.51
279,2024-03-31,-0.47,2.5,118.11
280,2024-04-30,0.19,2.5,116.80
281,2024-05-31,1.54,2.5,117.21


### Filter Row

#### By Index

In [29]:
df.loc[:100, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,2001-01-31,1.32,1.50,89.24,147.07,inrange,ease
1,2001-02-28,1.44,1.50,90.47,145.49,inrange,ease
2,2001-03-31,1.44,-0.50,89.33,140.02,inrange,ease
3,2001-04-30,2.52,-1.00,87.32,138.88,inrange,ease
4,2001-05-31,2.76,1.50,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
96,2008-12-31,0.44,2.75,96.45,286.35,outrrange,tight
97,2009-01-31,-0.48,2.00,97.22,287.25,outrrange,tight
98,2009-02-28,-0.12,1.50,97.97,282.11,outrrange,ease
99,2009-03-31,-0.24,1.50,97.62,288.33,outrrange,ease


In [30]:
df.loc[100:, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
100,2009-04-30,-0.93,1.25,97.14,291.01,outrrange,ease
101,2009-05-31,-3.40,1.25,97.07,300.03,outrrange,ease
102,2009-06-30,-3.93,1.25,97.50,297.26,outrrange,ease
103,2009-07-31,-4.36,1.25,97.32,294.04,outrrange,ease
104,2009-08-31,-1.15,1.25,96.89,297.15,outrrange,ease
...,...,...,...,...,...,...,...
278,2024-02-29,-0.77,2.50,118.51,718.27,outrrange,tight
279,2024-03-31,-0.47,2.50,118.11,711.58,outrrange,tight
280,2024-04-30,0.19,2.50,116.80,695.69,outrrange,tight
281,2024-05-31,1.54,2.50,117.21,702.68,inrange,tight


In [31]:
df.loc[10:30, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
10,2001-11-30,,2.5,89.42,148.71,outrrange,tight
11,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight
12,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
13,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
14,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight
15,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight
16,2002-04-30,0.47,2.0,92.36,152.66,outrrange,tight
17,2002-05-31,0.24,2.0,92.02,155.95,outrrange,tight
18,2002-06-30,0.36,2.0,92.03,158.75,outrrange,tight
19,2002-07-31,0.24,2.0,92.41,157.59,outrrange,tight


#### By Logic

In [32]:
df[df['type_of_monetary_policy'] == 'ease']

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
0,2001-01-31,1.32,1.50,89.24,147.07,inrange,ease
1,2001-02-28,1.44,1.50,90.47,145.49,inrange,ease
2,2001-03-31,1.44,-0.50,89.33,140.02,inrange,ease
3,2001-04-30,2.52,-1.00,87.32,138.88,inrange,ease
4,2001-05-31,2.76,1.50,87.16,139.49,inrange,ease
...,...,...,...,...,...,...,...
264,2022-12-31,5.89,1.25,118.42,721.26,outrrange,ease
265,2023-01-31,5.02,1.50,121.91,760.44,outrrange,ease
266,2023-02-28,3.79,1.50,119.92,713.86,outrrange,ease
267,2023-03-31,2.83,1.75,119.20,739.60,inrange,ease


In [33]:
df[df['neer'] > 100]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
79,2007-07-31,1.73,3.25,101.04,266.82,inrange,tight
87,2008-03-31,5.35,3.25,100.51,298.91,outrrange,tight
112,2010-04-30,2.97,1.25,100.22,334.93,inrange,ease
113,2010-05-31,3.41,1.25,101.54,338.40,outrrange,ease
114,2010-06-30,3.32,1.25,101.79,334.56,outrrange,ease
...,...,...,...,...,...,...,...
278,2024-02-29,-0.77,2.50,118.51,718.27,outrrange,tight
279,2024-03-31,-0.47,2.50,118.11,711.58,outrrange,tight
280,2024-04-30,0.19,2.50,116.80,695.69,outrrange,tight
281,2024-05-31,1.54,2.50,117.21,702.68,inrange,tight


In [34]:
df[
    (df['type_of_monetary_policy'] == 'ease') &
    (df['neer'] > 100)
]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
112,2010-04-30,2.97,1.25,100.22,334.93,inrange,ease
113,2010-05-31,3.41,1.25,101.54,338.40,outrrange,ease
114,2010-06-30,3.32,1.25,101.79,334.56,outrrange,ease
115,2010-07-31,3.47,1.50,100.37,337.53,outrrange,ease
116,2010-08-31,3.36,1.75,101.12,350.76,outrrange,ease
...,...,...,...,...,...,...,...
264,2022-12-31,5.89,1.25,118.42,721.26,outrrange,ease
265,2023-01-31,5.02,1.50,121.91,760.44,outrrange,ease
266,2023-02-28,3.79,1.50,119.92,713.86,outrrange,ease
267,2023-03-31,2.83,1.75,119.20,739.60,inrange,ease


## Missing Value

### Detect

In [35]:
df.loc[df['cpi'].isnull(), :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
5,2001-06-30,,2.5,88.3,139.53,inrange,tight
6,2001-07-31,,2.5,87.81,138.87,inrange,tight
7,2001-08-31,,2.5,87.32,144.93,inrange,tight
8,2001-09-30,,2.5,88.12,144.44,inrange,tight
9,2001-10-31,,2.5,88.26,144.56,inrange,tight
10,2001-11-30,,2.5,89.42,148.71,outrrange,tight


### Handling

In [36]:
missing_index = df.loc[df['cpi'].isnull(), :].index
missing_index

Index([5, 6, 7, 8, 9, 10], dtype='int64')

#### Fill

##### Fix Value

In [37]:
fill_df = df.copy()

In [38]:
fill_df['cpi'] = fill_df['cpi'].fillna(0)

In [39]:
fill_df.loc[missing_index, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
5,2001-06-30,0.0,2.5,88.3,139.53,inrange,tight
6,2001-07-31,0.0,2.5,87.81,138.87,inrange,tight
7,2001-08-31,0.0,2.5,87.32,144.93,inrange,tight
8,2001-09-30,0.0,2.5,88.12,144.44,inrange,tight
9,2001-10-31,0.0,2.5,88.26,144.56,inrange,tight
10,2001-11-30,0.0,2.5,89.42,148.71,outrrange,tight


##### Fill Foward Value

In [40]:
ffill_df = df.copy()

In [41]:
ffill_df.loc[4, :]

date                       2001-05-31 00:00:00
cpi                                       2.76
policy_rate                                1.5
neer                                     87.16
money_supply                            139.49
inflation_target                       inrange
type_of_monetary_policy                   ease
Name: 4, dtype: object

In [42]:
ffill_df['cpi'] = ffill_df['cpi'].ffill()

In [43]:
ffill_df.loc[missing_index, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
5,2001-06-30,2.76,2.5,88.3,139.53,inrange,tight
6,2001-07-31,2.76,2.5,87.81,138.87,inrange,tight
7,2001-08-31,2.76,2.5,87.32,144.93,inrange,tight
8,2001-09-30,2.76,2.5,88.12,144.44,inrange,tight
9,2001-10-31,2.76,2.5,88.26,144.56,inrange,tight
10,2001-11-30,2.76,2.5,89.42,148.71,outrrange,tight


##### Fill Backward Value

In [44]:
bfill_df = df.copy()

In [45]:
ffill_df.loc[11, :]

date                       2001-12-31 00:00:00
cpi                                       0.72
policy_rate                               2.25
neer                                     91.11
money_supply                            148.43
inflation_target                     outrrange
type_of_monetary_policy                  tight
Name: 11, dtype: object

In [46]:
bfill_df['cpi'] = bfill_df['cpi'].bfill()

In [47]:
bfill_df.loc[missing_index, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
5,2001-06-30,0.72,2.5,88.3,139.53,inrange,tight
6,2001-07-31,0.72,2.5,87.81,138.87,inrange,tight
7,2001-08-31,0.72,2.5,87.32,144.93,inrange,tight
8,2001-09-30,0.72,2.5,88.12,144.44,inrange,tight
9,2001-10-31,0.72,2.5,88.26,144.56,inrange,tight
10,2001-11-30,0.72,2.5,89.42,148.71,outrrange,tight


##### Imputation

In [48]:
imputation_df = df.copy()

In [49]:
imputation_df['cpi'].mean()

2.345198555956679

In [50]:
imputation_df['cpi'] = imputation_df['cpi'].fillna(imputation_df['cpi'].mean())

In [51]:
imputation_df.loc[missing_index, :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
5,2001-06-30,2.345199,2.5,88.3,139.53,inrange,tight
6,2001-07-31,2.345199,2.5,87.81,138.87,inrange,tight
7,2001-08-31,2.345199,2.5,87.32,144.93,inrange,tight
8,2001-09-30,2.345199,2.5,88.12,144.44,inrange,tight
9,2001-10-31,2.345199,2.5,88.26,144.56,inrange,tight
10,2001-11-30,2.345199,2.5,89.42,148.71,outrrange,tight


##### Interpolatation

In [52]:
interpolation_df = df.copy()

In [53]:
interpolation_df.loc[4:11]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
5,2001-06-30,,2.5,88.3,139.53,inrange,tight
6,2001-07-31,,2.5,87.81,138.87,inrange,tight
7,2001-08-31,,2.5,87.32,144.93,inrange,tight
8,2001-09-30,,2.5,88.12,144.44,inrange,tight
9,2001-10-31,,2.5,88.26,144.56,inrange,tight
10,2001-11-30,,2.5,89.42,148.71,outrrange,tight
11,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight


In [54]:
interpolation_df['cpi'] = interpolation_df['cpi'].interpolate(method='linear')

In [55]:
interpolation_df.loc[4:11]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
5,2001-06-30,2.468571,2.5,88.3,139.53,inrange,tight
6,2001-07-31,2.177143,2.5,87.81,138.87,inrange,tight
7,2001-08-31,1.885714,2.5,87.32,144.93,inrange,tight
8,2001-09-30,1.594286,2.5,88.12,144.44,inrange,tight
9,2001-10-31,1.302857,2.5,88.26,144.56,inrange,tight
10,2001-11-30,1.011429,2.5,89.42,148.71,outrrange,tight
11,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight


#### Remove

In [56]:
remove_df = df.copy()

In [57]:
remove_df.loc[4:11]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
5,2001-06-30,,2.5,88.3,139.53,inrange,tight
6,2001-07-31,,2.5,87.81,138.87,inrange,tight
7,2001-08-31,,2.5,87.32,144.93,inrange,tight
8,2001-09-30,,2.5,88.12,144.44,inrange,tight
9,2001-10-31,,2.5,88.26,144.56,inrange,tight
10,2001-11-30,,2.5,89.42,148.71,outrrange,tight
11,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight


In [58]:
remove_df = remove_df.dropna()

In [59]:
remove_df.loc[4:11]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
11,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight


In [60]:
remove_df = remove_df.reset_index(drop=True)

In [61]:
remove_df.loc[4:11]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
4,2001-05-31,2.76,1.5,87.16,139.49,inrange,ease
5,2001-12-31,0.72,2.25,91.11,148.43,outrrange,tight
6,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
7,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
8,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight
9,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight
10,2002-04-30,0.47,2.0,92.36,152.66,outrrange,tight
11,2002-05-31,0.24,2.0,92.02,155.95,outrrange,tight


## Duplicate Data

### Detect

In [62]:
df.loc[df.duplicated(subset=['date']), :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
14,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight


In [63]:
df.loc[df.duplicated(subset=['date'], keep=False), :]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
13,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
14,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight


### Handling

#### Keep First

In [64]:
keep_first_df = df.copy()

In [65]:
keep_first_df.loc[12:15]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
12,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
13,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
14,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight
15,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight


In [66]:
keep_first_df = keep_first_df.drop_duplicates(subset=['date'], keep='first').reset_index(drop=True)

In [67]:
keep_first_df.loc[12:15]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
12,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
13,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
14,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight
15,2002-04-30,0.47,2.0,92.36,152.66,outrrange,tight


#### Keep Last

In [68]:
keep_last_df = df.copy()

In [69]:
keep_last_df.loc[12:15]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
12,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
13,2002-02-28,0.36,2.0,92.56,149.9,outrrange,tight
14,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight
15,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight


In [70]:
keep_last_df = keep_last_df.drop_duplicates(subset=['date'], keep='last').reset_index(drop=True)

In [71]:
keep_last_df.loc[12:15]

Unnamed: 0,date,cpi,policy_rate,neer,money_supply,inflation_target,type_of_monetary_policy
12,2002-01-31,0.84,2.0,91.88,147.27,outrrange,tight
13,2002-02-28,0.3,2.0,92.56,149.9,outrrange,tight
14,2002-03-31,0.6,2.0,92.89,150.91,outrrange,tight
15,2002-04-30,0.47,2.0,92.36,152.66,outrrange,tight
