### Importing the libraries and scraping the link

In [1]:
import pandas as pd
import numpy as np

table_PD = pd.read_html('https://media.pearsoncmg.com/ph/bp/bp_studenmund_econometrics_7/exercises/IE1/table1.htm')

In [2]:
print(f'Total tables: {len(table_PD)}')

Total tables: 1


In [3]:
type(table_PD)

list

### Viewing the scraped table

In [4]:
df=pd.DataFrame(np.concatenate(table_PD))
df.columns

RangeIndex(start=0, stop=7, step=1)

In [5]:
df.columns = ['obs', 'QDPASS', 'QYDUS', 'QYPERM', 'BRANCH', 'QRTB3Y', 'EXPINF']

In [6]:
df["BRANCH"][13] = df["BRANCH"][13]/1000

In [7]:
df.head()

Unnamed: 0,obs,QDPASS,QYDUS,QYPERM,BRANCH,QRTB3Y,EXPINF
0,1970:1,84312,671.5,646.39,8498,7.50124,4.8
1,1970:2,83141,692.4,659.41,8498,6.96499,5.8
2,1970:3,82754,705.8,675.3,8498,6.56905,4.9
3,1970:4,84120,711.5,690.33,8372,5.50735,3.5
4,1971:1,85525,732.7,701.97,8722,3.95562,5.9


### Wrangling the data

In [8]:
df.dtypes

obs       object
QDPASS    object
QYDUS     object
QYPERM    object
BRANCH    object
QRTB3Y    object
EXPINF    object
dtype: object

In [9]:
df['obs'] = df['obs'].str.replace(':','-')
df["QDPASS"] = pd.to_numeric(df["QDPASS"], downcast="float")
df["QYDUS"] = pd.to_numeric(df["QYDUS"], downcast="float")
df["QYPERM"] = pd.to_numeric(df["QYPERM"], downcast="float")
df["BRANCH"] = pd.to_numeric(df["BRANCH"], downcast="float")
df["QRTB3Y"] = pd.to_numeric(df["QRTB3Y"], downcast="float")
df["EXPINF"] = pd.to_numeric(df["EXPINF"], downcast="float")

In [10]:
df = df.reset_index()
df.head()

Unnamed: 0,index,obs,QDPASS,QYDUS,QYPERM,BRANCH,QRTB3Y,EXPINF
0,0,1970-1,84312.0,671.5,646.390015,8498.0,7.501241,4.8
1,1,1970-2,83141.0,692.400024,659.409973,8498.0,6.964994,5.8
2,2,1970-3,82754.0,705.799988,675.299988,8498.0,6.569049,4.9
3,3,1970-4,84120.0,711.5,690.330017,8372.0,5.507346,3.5
4,4,1971-1,85525.0,732.700012,701.969971,8722.0,3.955616,5.9


MMCDUM is a dummy variable equal to zero before the third-quarter 1978 legalization of money market certificates and equal to one thereafter

In [11]:
print(df[df["obs"] == "1978-3"].index)

Int64Index([34], dtype='int64')


In [12]:
mm=[]
for i in df["index"]:
    if i < 34:
        mm.append(0)
    else:
        mm.append(1)
        
df["MMCDUM"] = mm

QRDPASS was 5 percent until the fourth quarter of 1973, when it changed to 5.25 percent, where it remained until the third quarter of 1979 when it rose to 5.50 percent.

In [13]:
print(df[df["obs"] == "1973-4"].index)
print(df[df["obs"] == "1979-3"].index)

Int64Index([15], dtype='int64')
Int64Index([38], dtype='int64')


In [14]:
qp=[]
for i in df["index"]:
    if i < 15:
        qp.append(5)
    elif i >= 15 and i < 38:
        qp.append(5.25)
    else:
        qp.append(5.50)
        
df["QRDPASS"] = qp

In [15]:
df["SPREAD"] = df["QRDPASS"] - df["QRTB3Y"]

In [16]:
df = df[["index","obs","QDPASS","QYDUS","QYPERM","QRDPASS","QRTB3Y","SPREAD","MMCDUM","EXPINF","BRANCH"]]

In [17]:
df = df.set_index("index")

### Viewing the final dataset

In [18]:
df

Unnamed: 0_level_0,obs,QDPASS,QYDUS,QYPERM,QRDPASS,QRTB3Y,SPREAD,MMCDUM,EXPINF,BRANCH
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1970-1,84312.0,671.5,646.390015,5.0,7.501241,-2.501241,0,4.8,8498.0
1,1970-2,83141.0,692.400024,659.409973,5.0,6.964994,-1.964994,0,5.8,8498.0
2,1970-3,82754.0,705.799988,675.299988,5.0,6.569049,-1.569049,0,4.9,8498.0
3,1970-4,84120.0,711.5,690.330017,5.0,5.507346,-0.507346,0,3.5,8372.0
4,1971-1,85525.0,732.700012,701.969971,5.0,3.955616,1.044384,0,5.9,8722.0
5,1971-2,89286.0,749.299988,716.929993,5.0,4.310242,0.689758,0,5.7,8722.0
6,1971-3,90618.0,757.599976,732.409973,5.0,5.186687,-0.186687,0,5.1,8722.0
7,1971-4,92310.0,767.400024,745.52002,5.0,4.339589,0.660411,0,4.1,8862.0
8,1972-1,95112.0,782.200012,757.369995,5.0,3.513554,1.486446,0,3.2,8862.0
9,1972-2,97361.0,794.5,769.549988,5.0,3.836743,1.163257,0,5.4,8862.0


### Saving as a stata dta file

In [19]:
df.to_stata('Problem 1 Dataset.dta')  