# Load Dataset

In [1]:
from google.colab import drive
drive.mount('/content/MyDrive')

Mounted at /content/MyDrive


In [2]:
!unzip -q "/content/MyDrive/MyDrive/UMHackathon/UMHackathon 2021 Finance Dataset.zip" -d "dataset" 
#!unzip -q "/content/MyDrive/MyDrive/UMHackathon 2021 Finance Dataset.zip" -d "dataset" 

In [3]:
!pip install selenium -q
!apt-get update 
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

[K     |████████████████████████████████| 904 kB 4.3 MB/s 
Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release [696 B]
Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release.gpg [836 B]
Hit:9 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:10 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Hit:11 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Get:

In [4]:
import pandas as pd
import glob
import os
import dateutil.parser
import functools
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import datetime
import random

import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from dateutil.relativedelta import relativedelta
import requests
from bs4 import BeautifulSoup

  import pandas.util.testing as tm


# Data Cleaning

In [5]:
dataset_dir = "/content/dataset/BPAM Evaluated Prices Rated Range Prices (LT)"

# Merge both files (BPANERP & BPAMERS) together as a single pandas frame
def read_all_csvs(dataset_dir):
  sub_dirs = [f for f in os.listdir(dataset_dir) if not os.path.isfile(f)]
  df_BPAMERP = pd.DataFrame()
  df_BPAMERS = pd.DataFrame()
  df_joined = pd.DataFrame()
  for sub_dir in sub_dirs:
    full_path = os.path.join(dataset_dir, sub_dir)
    for file_name in os.listdir(full_path):
      full_file_name = os.path.join(full_path, file_name)
      if "BPAMERP" in file_name:
        temp_df_BPAMERP =  pd.read_csv(full_file_name, parse_dates=True)
        df_BPAMERP = pd.concat([df_BPAMERP, temp_df_BPAMERP])
      elif "BPAMERS" in file_name:
        temp_df_BPAMERS =  pd.read_csv(full_file_name, parse_dates=True)
        df_BPAMERS = pd.concat([df_BPAMERS, temp_df_BPAMERS])
    df_joined = pd.concat([df_joined, temp_df_BPAMERP.merge(temp_df_BPAMERS, on = ['STOCK CODE','ISIN CODE','STOCK NAME'], how = 'inner')])
  return df_BPAMERP.reset_index(drop=True), df_BPAMERS.reset_index(drop=True), df_joined.sort_values(by=['VALUE DATE']).reset_index(drop=True)

df_BPAMERP, df_BPAMERS, df_joined = read_all_csvs(dataset_dir)
df_joined.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34206 entries, 0 to 34205
Data columns (total 49 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   STOCK CODE                                34206 non-null  object 
 1   ISIN CODE                                 34206 non-null  object 
 2   STOCK NAME                                34206 non-null  object 
 3   VALUE DATE                                34206 non-null  object 
 4   EVAL UPPER THRESHOLD YIELD                34206 non-null  float64
 5   EVAL MID YIELD                            34206 non-null  float64
 6   EVAL LOWER THRESHOLD YIELD                34206 non-null  float64
 7   EVAL LOWER THRESHOLD PRICE                34206 non-null  float64
 8   EVAL MID PRICE                            34206 non-null  float64
 9   EVAL UPPER THRESHOLD PRICE                34206 non-null  float64
 10  MODIFIED DURATION                 

In [6]:
# parse datetime data type
date_columns = [column for column in df_joined.columns if "DATE" in column]
for date_column in date_columns:
  df_joined[date_column] = pd.to_datetime(df_joined[date_column], infer_datetime_format=True)  

# parse bool data type
d = {'Y': True, 'N': False}
df_joined["CALLABLE/PUTTABLE"] = df_joined["CALLABLE/PUTTABLE"].map(d)
df_joined["CONVERTIBLE/EXCHANGABLE"] = df_joined["CONVERTIBLE/EXCHANGABLE"].map(d)

# parse category data type
obj_columns = df_joined.select_dtypes(include='object').columns
for obj_column in obj_columns:
  df_joined[obj_column] = df_joined[obj_column].astype('category')

# show the memory usage
df_joined.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34206 entries, 0 to 34205
Data columns (total 49 columns):
 #   Column                                    Non-Null Count  Dtype         
---  ------                                    --------------  -----         
 0   STOCK CODE                                34206 non-null  category      
 1   ISIN CODE                                 34206 non-null  category      
 2   STOCK NAME                                34206 non-null  category      
 3   VALUE DATE                                34206 non-null  datetime64[ns]
 4   EVAL UPPER THRESHOLD YIELD                34206 non-null  float64       
 5   EVAL MID YIELD                            34206 non-null  float64       
 6   EVAL LOWER THRESHOLD YIELD                34206 non-null  float64       
 7   EVAL LOWER THRESHOLD PRICE                34206 non-null  float64       
 8   EVAL MID PRICE                            34206 non-null  float64       
 9   EVAL UPPER THRESHOLD PRICE  

In [7]:
# check null value
df_joined.isnull().sum().sort_values()

STOCK CODE                                      0
PRINCIPLE                                       0
BOND TYPE                                       0
BOND CLASS                                      0
CONVERTIBLE/EXCHANGABLE                         0
ISSUE DATE                                      0
MATURITY DATE                                   0
DAY COUNT BASIS                                 0
ISSUER NAME                                     0
FACILITY AMOUNT/FACILITY LIMIT(MYR MIL)         0
BOND ISSUE AMOUNT(MYR MIL)                      0
BOND CURRENT OUTSTANDING AMOUNT(MYR MIL)        0
REMAINING TENURE                                0
ISSUER FACILITY LIMIT(MYR MIL)                  0
ISSUER OUTSTANDING AMOUNT(MYR MIL)              0
SECTOR                                          0
CALLABLE/PUTTABLE                               0
FACILITY OUTSTANDING AMOUNT(MYR MIL)            0
FACILITY CODE                                   0
RATING                                          0


In [8]:
# impute null value
df_joined["COUPON FREQUENCY"].fillna(0, inplace = True)
df_joined["NEXT COUPON RATE"].fillna(0, inplace = True)
df_joined["PREVIOUS COUPON RATE"].fillna(0, inplace = True)
df_joined["ISLAMIC CONCEPT"] = df_joined["ISLAMIC CONCEPT"].cat.add_categories("CONVENTIONAL").fillna("CONVENTIONAL")
df_joined["RATING AGENCY"] = df_joined["RATING AGENCY"].cat.add_categories("NOT RATED").fillna("NOT RATED")
df_joined["RATING ACTION"] = df_joined["RATING ACTION"].cat.add_categories("NOT RATED").fillna("NOT RATED")

In [9]:
# check null value
df_joined.isnull().sum().sort_values()

STOCK CODE                                      0
CONVERTIBLE/EXCHANGABLE                         0
RATING AGENCY                                   0
ISSUE DATE                                      0
MATURITY DATE                                   0
DAY COUNT BASIS                                 0
COUPON FREQUENCY                                0
PREVIOUS COUPON RATE                            0
NEXT COUPON RATE                                0
BOND CLASS                                      0
FACILITY AMOUNT/FACILITY LIMIT(MYR MIL)         0
BOND ISSUE AMOUNT(MYR MIL)                      0
BOND CURRENT OUTSTANDING AMOUNT(MYR MIL)        0
REMAINING TENURE                                0
ISSUER FACILITY LIMIT(MYR MIL)                  0
ISSUER OUTSTANDING AMOUNT(MYR MIL)              0
ISLAMIC CONCEPT                                 0
SECTOR                                          0
CALLABLE/PUTTABLE                               0
FACILITY OUTSTANDING AMOUNT(MYR MIL)            0


# Features Engineering

In [10]:
# extract month data from [VALUE DATE] 
# eg: VALUE DATE 2020-04-01 refers to March, that is why we need to subtract by 1
df_joined["VALUE DATE MONTH"] = df_joined["VALUE DATE"].dt.to_period('M') - 1
df_joined["VALUE DATE MONTH"]

0        2020-03
1        2020-03
2        2020-03
3        2020-03
4        2020-03
          ...   
34201    2020-04
34202    2020-04
34203    2020-04
34204    2020-04
34205    2020-04
Name: VALUE DATE MONTH, Length: 34206, dtype: period[M]

In [11]:
# calculate maturity duration in days 
df_joined["MATURITY DURATION"] = (df_joined["MATURITY DATE"] - df_joined["ISSUE DATE"]).dt.days
df_joined["MATURITY DURATION"]

0         7305
1         9129
2         9130
3         8766
4        10958
         ...  
34201     9130
34202     9129
34203     9129
34204    10956
34205     2555
Name: MATURITY DURATION, Length: 34206, dtype: int64

In [12]:
# calculate accured interest
# assume ACTACT and ACTBOTH are the same
def cal_accured_interest(bond):
  prev_date = bond["PREVIOUS PAYMENT DATE"] if bond["PREVIOUS PAYMENT DATE"] is not pd.NaT else bond["ISSUE DATE"]
  num_of_days_since_last_coupon_payment = float((bond["VALUE DATE"] - prev_date).days)
  if bond["DAY COUNT BASIS"] in ["ACTACT","ACTBOTH"]:
    payment_period = float((bond["NEXT PAYMENT DATE"] - prev_date).days)
    if bond["COUPON FREQUENCY"] == 0:
      return 0
    accured_interest = 100 * (bond["NEXT COUPON RATE"] / 100) / bond["COUPON FREQUENCY"] * num_of_days_since_last_coupon_payment / payment_period
    return accured_interest
  elif bond["DAY COUNT BASIS"] in ["ACT365"]:
    accured_interest = 100 * (bond["NEXT COUPON RATE"] / 100) * num_of_days_since_last_coupon_payment / 365
    return accured_interest

df_joined["ACCRUED INTEREST"] = df_joined.apply(cal_accured_interest, axis=1)
df_joined["ACCRUED INTEREST"]

0        2.302934
1        1.930685
2        0.574247
3        2.366466
4        2.460630
           ...   
34201    1.048000
34202    2.402630
34203    2.293699
34204    2.462466
34205    1.780822
Name: ACCRUED INTEREST, Length: 34206, dtype: float64

In [13]:
def opr_extract():
  URL = "https://www.bnm.gov.my/opr-decision-and-statement"
  page = requests.get(URL)
  soup = BeautifulSoup(page.content,'lxml')
  content=soup.find_all('td')[4:]
  rates=[]
  dates=[]
  for i in range(len(content)):
    if i%4==0:
      dates.append(str(content[i]).replace("<td>","").replace("</td>",""))
    elif i%2==0:
      rates.append(float(str(content[i]).replace("<td>\n\t\t","").replace("\n\t</td>","")))
    else:
      continue
  i=0
  while i in range(len(rates)-1):
    if rates[i] == rates[i+1]:
        del rates[i]
        del dates[i]
    else:
        i += 1
  OPR_movement={}
  for i in range(len(rates)):
    OPR_movement[dates[i]]=rates[i]
  return OPR_movement

def assign_opr(value_date):
  for (date, opr) in OPR_movement: 
    date = dateutil.parser.parse(date) 
    if value_date >= date:
      return opr

# set opr movement
OPR_movement = opr_extract()
# sort the opr dict according to the date in descending order
OPR_movement = sorted(OPR_movement.items(), key=lambda item: dateutil.parser.parse(item[0]), reverse=True)
df_joined["OPR MOVEMENT"] = df_joined["VALUE DATE"].apply(assign_opr)
df_joined["OPR MOVEMENT"]

0        2.5
1        2.5
2        2.5
3        2.5
4        2.5
        ... 
34201    2.5
34202    2.5
34203    2.5
34204    2.5
34205    2.5
Name: OPR MOVEMENT, Length: 34206, dtype: float64

In [15]:
def inflation_rate_extract():
  chrome_options = webdriver.ChromeOptions()
  chrome_options.add_argument('--headless')
  chrome_options.add_argument('--no-sandbox')
  chrome_options.add_argument('--disable-dev-shm-usage')
  chrome_options.add_argument("start-maximized")
  chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
  chrome_options.add_experimental_option('useAutomationExtension', False)

  wd = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
  wd.get("https://www.fxempire.com/macro/malaysia/inflation-rate")

  #Most recent three years
  wd.find_element_by_xpath('//div[@class=" css-16ycfp3"]').click()
  wd.find_element_by_xpath("//div[@class=' css-14rzzno-menu']//*[name()='div']").click()

  wd.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(wd, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='recharts-wrapper']"))))
  elements = WebDriverWait(wd, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@class='recharts-wrapper']//*[name()='svg']//*[name()='g' and @class='recharts-layer recharts-bar']//*[name()='g']//*[name()='g']//*[name()='g']")))
  ir=[]
  for element in elements:
    ActionChains(wd).move_to_element(element).perform()
    mouseover = WebDriverWait(wd, 5).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='recharts-wrapper']")))
    ir.append(float(mouseover.text.strip().replace("%","")[-6:].replace("\n","")))
  current_date=datetime.date.today()-relativedelta(days=datetime.date.today().day)
  inflation_dict={}
  for i in range(len(ir)-1,-1,-1):
    inflation_dict[str(current_date)]=ir[i]
    current_date-=relativedelta(days=current_date.day)
  inflation_dict['2021-06-30']=3.4
  inflation_dict['2021-05-31']=4.4
  return inflation_dict

def assign_ir(value_date):
  for (date, ir) in inflation_dict: 
    date = dateutil.parser.parse(date) 
    if value_date >= date:
      return ir

#set inflation rate
inflation_dict=inflation_rate_extract()
# sort the ir dict according to the date in descending order
inflation_dict = sorted(inflation_dict.items(), key=lambda item: dateutil.parser.parse(item[0]), reverse=True)
df_joined["INFLATION RATE"]=df_joined["VALUE DATE"].apply(assign_ir)
df_joined

  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,STOCK CODE,ISIN CODE,STOCK NAME,VALUE DATE,EVAL UPPER THRESHOLD YIELD,EVAL MID YIELD,EVAL LOWER THRESHOLD YIELD,EVAL LOWER THRESHOLD PRICE,EVAL MID PRICE,EVAL UPPER THRESHOLD PRICE,MODIFIED DURATION,CONVEXITY,EVAL UPPER THRESHOLD YIELD CHANGE,EVAL MID YIELD CHANGE,EVAL LOWER THRESHOLD YIELD CHANGE,EVAL LOWER THRESHOLD PRICE CHANGE,EVAL MID PRICE CHANGE,EVAL UPPER THRESHOLD PRICE CHANGE,COMPOSITE LIQUIDITY SCORE (T-1),FACILITY CODE,ISSUER NAME,PRINCIPLE,BOND TYPE,BOND CLASS,RATING,RATING AGENCY,ISSUE DATE,MATURITY DATE,EXPECTED MATURITY DATE,DAY COUNT BASIS,COUPON FREQUENCY,FIRST PAYMENT DATE,PREVIOUS PAYMENT DATE,PREVIOUS COUPON RATE,NEXT PAYMENT DATE,NEXT COUPON RATE,FACILITY AMOUNT/FACILITY LIMIT(MYR MIL),FACILITY OUTSTANDING AMOUNT(MYR MIL),BOND ISSUE AMOUNT(MYR MIL),BOND CURRENT OUTSTANDING AMOUNT(MYR MIL),REMAINING TENURE,ISSUER FACILITY LIMIT(MYR MIL),ISSUER OUTSTANDING AMOUNT(MYR MIL),ISLAMIC CONCEPT,SECTOR,RATING EFFECTIVE DATE,CALLABLE/PUTTABLE,CONVERTIBLE/EXCHANGABLE,RATING ACTION,VALUE DATE MONTH,MATURITY DURATION,ACCRUED INTEREST,OPR MOVEMENT,INFLATION RATE
0,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2020-04-01,4.023,4.006,3.989,109.039,109.259,109.480,11.618,175.207,-0.043,-0.044,-0.045,0.545,0.560,0.574,3.7,199600011,Government of Malaysia,CONVENTIONAL,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,401633,12500,12500,20Y,9100000,793533,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2020-03,7305,2.302934,2.5,1.3
1,VZ170377,MYBVZ1703777,DANAINFRA IMTN 5.220% 14.11.2042 - Tranche No 71,2020-04-01,4.495,4.387,4.279,110.223,111.868,113.546,13.513,252.626,-0.058,-0.060,-0.062,0.870,0.918,0.967,1.7,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2042-11-14,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.220,2020-05-18,5.220,61000,55820,600,600,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,9129,1.930685,2.5,1.3
2,VZ180073,MYBVZ1800730,DANAINFRA IMTN 5.240% 20.02.2043 - Tranche No 76,2020-04-01,4.509,4.396,4.283,110.364,112.098,113.868,13.759,259.435,-0.058,-0.060,-0.062,0.875,0.925,0.975,0.0,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2018-02-21,2043-02-20,NaT,ACT365,2.0,2018-08-21,2020-02-21,5.240,2020-08-21,5.240,61000,55820,1000,1000,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,9130,0.574247,2.5,1.3
3,VZ190073,MYBVZ1900738,DANAINFRA IMTN 4.720% 01.04.2043 - Tranche No 89,2020-04-01,4.498,4.400,4.302,103.161,104.600,106.066,13.855,265.353,-0.059,-0.061,-0.063,0.853,0.899,0.945,3.0,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2019-04-01,2043-04-01,NaT,ACT365,2.0,2019-10-01,2019-10-01,4.720,2020-04-01,4.720,61000,55820,500,500,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,8766,2.366466,2.5,1.3
4,VZ140103,MYBVZ1401034,DANAINFRA IMTN 5.510% 21.04.2044 - Tranche No 19,2020-04-01,4.550,4.437,4.324,113.946,115.765,117.623,13.761,267.815,-0.057,-0.059,-0.061,0.901,0.954,1.007,0.0,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2014-04-21,2044-04-21,NaT,ACT365,2.0,2014-10-21,2019-10-21,5.510,2020-04-21,5.510,61000,55820,400,400,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,10958,2.460630,2.5,1.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34201,VZ180073,MYBVZ1800730,DANAINFRA IMTN 5.240% 20.02.2043 - Tranche No 76,2020-05-04,3.895,3.781,3.667,120.194,122.153,124.156,14.101,268.667,-0.013,-0.012,-0.011,0.215,0.201,0.188,0.0,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2018-02-21,2043-02-20,NaT,ACT365,2.0,2018-08-21,2020-02-21,5.240,2020-08-21,5.240,61000,56820,1000,1000,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9130,1.048000,2.5,-0.2
34202,VZ170377,MYBVZ1703777,DANAINFRA IMTN 5.220% 14.11.2042 - Tranche No 71,2020-05-04,3.882,3.773,3.664,119.970,121.828,123.724,13.854,261.640,-0.013,-0.012,-0.011,0.213,0.200,0.186,1.3,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2042-11-14,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.220,2020-05-18,5.220,61000,56820,600,600,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9129,2.402630,2.5,-0.2
34203,VZ170163,MYBVZ1701631,DANAINFRA IMTN 5.200% 23.05.2042 - Tranche No 66,2020-05-04,3.862,3.758,3.654,119.740,121.487,123.269,13.704,254.602,-0.013,-0.012,-0.011,0.211,0.197,0.183,2.0,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-05-25,2042-05-23,NaT,ACT365,2.0,2017-11-27,2019-11-25,5.200,2020-05-27,5.200,61000,56820,780,780,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9129,2.293699,2.5,-0.2
34204,VZ170378,MYBVZ1703785,DANAINFRA IMTN 5.350% 15.11.2047 - Tranche No 72,2020-05-04,4.040,3.936,3.832,121.643,123.639,125.681,15.386,337.621,-0.007,-0.006,-0.005,0.128,0.112,0.094,2.7,201200042,DanaInfra Nasional Berhad,ISLAMIC,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2047-11-15,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.350,2020-05-18,5.350,61000,56820,900,900,25Y+,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,10956,2.462466,2.5,-0.2


In [16]:
def mgs_extract():
  value_dates=df_joined["VALUE DATE"].unique()
  conv_MGS_movement={}
  islam_MGS_movement={}
  for date in value_dates:
    date=str(date)[:10]
    prev_date=datetime.date(int(date[0:4]),int(date[5:7]),int(date[8:10]))-relativedelta(days=1)
    URL = "https://www.bnm.gov.my/government-securities-yield?p_p_id=my_gov_bnm_yield_display_portlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&_my_gov_bnm_yield_display_portlet_tradingDateTxt="+str(prev_date)
    page = requests.get(URL)
    soup = BeautifulSoup(page.content,'html.parser')
    dates=[]
    mgs=[]
    content=soup.find_all('td')[11:43]
    #Get from earlier dates in the case of weekends/public holidays
    while(str(content[5]).strip().replace('<td align="center">\n','').replace('</td>','').replace('\n','').replace(' ','')==''):
      prev_date-=relativedelta(days=1)
      URL = "https://www.bnm.gov.my/government-securities-yield?p_p_id=my_gov_bnm_yield_display_portlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&_my_gov_bnm_yield_display_portlet_tradingDateTxt="+str(prev_date)
      page = requests.get(URL)
      soup = BeautifulSoup(page.content,'html.parser')
      content=soup.find_all('td')[11:43]
    for i in range(len(content)):
      if i%8==5:
        mgs.append(float(str(content[i]).strip().replace('<td align="center">\n','').replace('</td>','').replace('\n','').replace(' ','').replace('*','')))
    conv_MGS_movement[date]={}
    conv_MGS_movement[date]["3-year"]=mgs[0]
    conv_MGS_movement[date]["5-year"]=mgs[1]
    conv_MGS_movement[date]["7-year"]=mgs[2]
    conv_MGS_movement[date]["10-year"]=mgs[3]
    content=soup.find_all('td')[53:80]
    mgs=[]
    for i in range(len(content)):
      if i%7==4:
        mgs.append(float(str(content[i]).strip().replace('<td align="center">\n','').replace('</td>','').replace('\n','').replace(' ','').replace('*','')))
    islam_MGS_movement[date]={}
    islam_MGS_movement[date]["3-year"]=mgs[0]
    islam_MGS_movement[date]["5-year"]=mgs[1]
    islam_MGS_movement[date]["7-year"]=mgs[2]
    islam_MGS_movement[date]["10-year"]=mgs[3]
  return conv_MGS_movement,islam_MGS_movement

def assign_mgs(islamic,value_date,remaining_tenure):
  date=str(value_date)[:10]
  mgs_dict=conv_MGS_movement
  if islamic:
    mgs_dict=islam_MGS_movement
  if remaining_tenure in ["3M","6M","1Y","2Y","3Y"]:
    return mgs_dict[date]["3-year"]
  elif remaining_tenure=="5Y":
    return mgs_dict[date]["5-year"]
  elif remaining_tenure=="&Y":
    return mgs_dict[date]["7-year"]
  return mgs_dict[date]["10-year"]

#set mgs movement
conv_MGS_movement, islam_MGS_movement=mgs_extract()
df_joined["PRINCIPLE"]=df_joined["PRINCIPLE"].replace("CONVENTIONAL",0).replace("ISLAMIC",1)
df_joined["MGS"]=df_joined[["PRINCIPLE","VALUE DATE","REMAINING TENURE"]].apply(lambda x: assign_mgs(x["PRINCIPLE"],x["VALUE DATE"],x["REMAINING TENURE"]),axis=1)
df_joined["CREDIT SPREAD"]=df_joined["EVAL MID YIELD"]-df_joined["MGS"]
df_joined

Unnamed: 0,STOCK CODE,ISIN CODE,STOCK NAME,VALUE DATE,EVAL UPPER THRESHOLD YIELD,EVAL MID YIELD,EVAL LOWER THRESHOLD YIELD,EVAL LOWER THRESHOLD PRICE,EVAL MID PRICE,EVAL UPPER THRESHOLD PRICE,MODIFIED DURATION,CONVEXITY,EVAL UPPER THRESHOLD YIELD CHANGE,EVAL MID YIELD CHANGE,EVAL LOWER THRESHOLD YIELD CHANGE,EVAL LOWER THRESHOLD PRICE CHANGE,EVAL MID PRICE CHANGE,EVAL UPPER THRESHOLD PRICE CHANGE,COMPOSITE LIQUIDITY SCORE (T-1),FACILITY CODE,ISSUER NAME,PRINCIPLE,BOND TYPE,BOND CLASS,RATING,RATING AGENCY,ISSUE DATE,MATURITY DATE,EXPECTED MATURITY DATE,DAY COUNT BASIS,COUPON FREQUENCY,FIRST PAYMENT DATE,PREVIOUS PAYMENT DATE,PREVIOUS COUPON RATE,NEXT PAYMENT DATE,NEXT COUPON RATE,FACILITY AMOUNT/FACILITY LIMIT(MYR MIL),FACILITY OUTSTANDING AMOUNT(MYR MIL),BOND ISSUE AMOUNT(MYR MIL),BOND CURRENT OUTSTANDING AMOUNT(MYR MIL),REMAINING TENURE,ISSUER FACILITY LIMIT(MYR MIL),ISSUER OUTSTANDING AMOUNT(MYR MIL),ISLAMIC CONCEPT,SECTOR,RATING EFFECTIVE DATE,CALLABLE/PUTTABLE,CONVERTIBLE/EXCHANGABLE,RATING ACTION,VALUE DATE MONTH,MATURITY DURATION,ACCRUED INTEREST,OPR MOVEMENT,INFLATION RATE,MGS,CREDIT SPREAD
0,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2020-04-01,4.023,4.006,3.989,109.039,109.259,109.480,11.618,175.207,-0.043,-0.044,-0.045,0.545,0.560,0.574,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,401633,12500,12500,20Y,9100000,793533,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2020-03,7305,2.302934,2.5,1.3,3.35,0.656
1,VZ170377,MYBVZ1703777,DANAINFRA IMTN 5.220% 14.11.2042 - Tranche No 71,2020-04-01,4.495,4.387,4.279,110.223,111.868,113.546,13.513,252.626,-0.058,-0.060,-0.062,0.870,0.918,0.967,1.7,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2042-11-14,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.220,2020-05-18,5.220,61000,55820,600,600,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,9129,1.930685,2.5,1.3,3.45,0.937
2,VZ180073,MYBVZ1800730,DANAINFRA IMTN 5.240% 20.02.2043 - Tranche No 76,2020-04-01,4.509,4.396,4.283,110.364,112.098,113.868,13.759,259.435,-0.058,-0.060,-0.062,0.875,0.925,0.975,0.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2018-02-21,2043-02-20,NaT,ACT365,2.0,2018-08-21,2020-02-21,5.240,2020-08-21,5.240,61000,55820,1000,1000,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,9130,0.574247,2.5,1.3,3.45,0.946
3,VZ190073,MYBVZ1900738,DANAINFRA IMTN 4.720% 01.04.2043 - Tranche No 89,2020-04-01,4.498,4.400,4.302,103.161,104.600,106.066,13.855,265.353,-0.059,-0.061,-0.063,0.853,0.899,0.945,3.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2019-04-01,2043-04-01,NaT,ACT365,2.0,2019-10-01,2019-10-01,4.720,2020-04-01,4.720,61000,55820,500,500,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,8766,2.366466,2.5,1.3,3.45,0.950
4,VZ140103,MYBVZ1401034,DANAINFRA IMTN 5.510% 21.04.2044 - Tranche No 19,2020-04-01,4.550,4.437,4.324,113.946,115.765,117.623,13.761,267.815,-0.057,-0.059,-0.061,0.901,0.954,1.007,0.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2014-04-21,2044-04-21,NaT,ACT365,2.0,2014-10-21,2019-10-21,5.510,2020-04-21,5.510,61000,55820,400,400,25Y,74000,63820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-03,10958,2.460630,2.5,1.3,3.45,0.987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34201,VZ180073,MYBVZ1800730,DANAINFRA IMTN 5.240% 20.02.2043 - Tranche No 76,2020-05-04,3.895,3.781,3.667,120.194,122.153,124.156,14.101,268.667,-0.013,-0.012,-0.011,0.215,0.201,0.188,0.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2018-02-21,2043-02-20,NaT,ACT365,2.0,2018-08-21,2020-02-21,5.240,2020-08-21,5.240,61000,56820,1000,1000,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9130,1.048000,2.5,-0.2,2.84,0.941
34202,VZ170377,MYBVZ1703777,DANAINFRA IMTN 5.220% 14.11.2042 - Tranche No 71,2020-05-04,3.882,3.773,3.664,119.970,121.828,123.724,13.854,261.640,-0.013,-0.012,-0.011,0.213,0.200,0.186,1.3,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2042-11-14,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.220,2020-05-18,5.220,61000,56820,600,600,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9129,2.402630,2.5,-0.2,2.84,0.933
34203,VZ170163,MYBVZ1701631,DANAINFRA IMTN 5.200% 23.05.2042 - Tranche No 66,2020-05-04,3.862,3.758,3.654,119.740,121.487,123.269,13.704,254.602,-0.013,-0.012,-0.011,0.211,0.197,0.183,2.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-05-25,2042-05-23,NaT,ACT365,2.0,2017-11-27,2019-11-25,5.200,2020-05-27,5.200,61000,56820,780,780,25Y,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,9129,2.293699,2.5,-0.2,2.84,0.918
34204,VZ170378,MYBVZ1703785,DANAINFRA IMTN 5.350% 15.11.2047 - Tranche No 72,2020-05-04,4.040,3.936,3.832,121.643,123.639,125.681,15.386,337.621,-0.007,-0.006,-0.005,0.128,0.112,0.094,2.7,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2017-11-16,2047-11-15,NaT,ACT365,2.0,2018-05-16,2019-11-18,5.350,2020-05-18,5.350,61000,56820,900,900,25Y+,74000,64820,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-04,10956,2.462466,2.5,-0.2,2.84,1.096


In [17]:
# calculate: 
# 1. changes In EVAL MID Price/EVAL LOWER THRESHOLD PRICE/EVAL UPPER THRESHOLD PRICE	
# 2. changes in EVAL MID YIELD/EVAL LOWER THRESHOLD YIELD/EVAL UPPER THRESHOLD YIELD
def transform_eval_mid_price(df_joined):
  temp_df_joined = pd.DataFrame()
  for stock_code in df_joined["STOCK CODE"].unique():
    temp_df = df_joined[df_joined["STOCK CODE"] == stock_code].sort_values("VALUE DATE MONTH")
    # calculate changes In EVAL MID Price
    temp_df["CHANGES IN EVAL MID PRICE"] = temp_df["EVAL MID PRICE"].diff()
    temp_df["CHANGES IN EVAL LOWER THRESHOLD PRICE"] = temp_df["EVAL LOWER THRESHOLD PRICE"].diff()
    temp_df["CHANGES IN EVAL UPPER THRESHOLD PRICE"] = temp_df["EVAL UPPER THRESHOLD PRICE"].diff()
    # calculate changes In EVAL YIELD Price
    temp_df["CHANGES IN EVAL MID YIELD"] = temp_df["EVAL MID YIELD"].diff()
    temp_df["CHANGES IN EVAL LOWER THRESHOLD YIELD"] = temp_df["EVAL LOWER THRESHOLD YIELD"].diff()
    temp_df["CHANGES IN EVAL UPPER THRESHOLD YIELD"] = temp_df["EVAL UPPER THRESHOLD YIELD"].diff()
    # calculate moving average changes In EVAL MID Price for the past 5 months
    # temp_df["MA-5 CHANGES IN EVAL MID PRICE"] = temp_df["CHANGES IN EVAL MID PRICE"].rolling(5,1).mean()
    # calculate moving average changes In EVAL MID Yield for the past 5 months
    # temp_df["MA-5 CHANGES IN EVAL MID YIELD"] = temp_df["CHANGES IN EVAL MID YIELD"].rolling(5,1).mean()
    # shift the NEXT MONTH CHANGES IN EVAL MID PRICE to current month 
    # temp_df["NEXT MONTH CHANGES IN EVAL MID PRICE"] = temp_df["CHANGES IN EVAL MID PRICE"].shift(-1)
    temp_df_joined = pd.concat([temp_df_joined, temp_df])
    
  return temp_df_joined

df_joined = transform_eval_mid_price(df_joined)
df_joined

Unnamed: 0,STOCK CODE,ISIN CODE,STOCK NAME,VALUE DATE,EVAL UPPER THRESHOLD YIELD,EVAL MID YIELD,EVAL LOWER THRESHOLD YIELD,EVAL LOWER THRESHOLD PRICE,EVAL MID PRICE,EVAL UPPER THRESHOLD PRICE,MODIFIED DURATION,CONVEXITY,EVAL UPPER THRESHOLD YIELD CHANGE,EVAL MID YIELD CHANGE,EVAL LOWER THRESHOLD YIELD CHANGE,EVAL LOWER THRESHOLD PRICE CHANGE,EVAL MID PRICE CHANGE,EVAL UPPER THRESHOLD PRICE CHANGE,COMPOSITE LIQUIDITY SCORE (T-1),FACILITY CODE,ISSUER NAME,PRINCIPLE,BOND TYPE,BOND CLASS,RATING,RATING AGENCY,ISSUE DATE,MATURITY DATE,EXPECTED MATURITY DATE,DAY COUNT BASIS,COUPON FREQUENCY,FIRST PAYMENT DATE,PREVIOUS PAYMENT DATE,PREVIOUS COUPON RATE,NEXT PAYMENT DATE,NEXT COUPON RATE,FACILITY AMOUNT/FACILITY LIMIT(MYR MIL),FACILITY OUTSTANDING AMOUNT(MYR MIL),BOND ISSUE AMOUNT(MYR MIL),BOND CURRENT OUTSTANDING AMOUNT(MYR MIL),REMAINING TENURE,ISSUER FACILITY LIMIT(MYR MIL),ISSUER OUTSTANDING AMOUNT(MYR MIL),ISLAMIC CONCEPT,SECTOR,RATING EFFECTIVE DATE,CALLABLE/PUTTABLE,CONVERTIBLE/EXCHANGABLE,RATING ACTION,VALUE DATE MONTH,MATURITY DURATION,ACCRUED INTEREST,OPR MOVEMENT,INFLATION RATE,MGS,CREDIT SPREAD,CHANGES IN EVAL MID PRICE,CHANGES IN EVAL LOWER THRESHOLD PRICE,CHANGES IN EVAL UPPER THRESHOLD PRICE,CHANGES IN EVAL MID YIELD,CHANGES IN EVAL LOWER THRESHOLD YIELD,CHANGES IN EVAL UPPER THRESHOLD YIELD
3572,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-08-01,4.005,3.997,3.989,109.522,109.629,109.736,12.018,187.161,0.016,0.016,0.016,-0.212,-0.213,-0.213,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-04-08,4.762,2019-10-07,4.762,1000000,403852,12500,12500,20Y,9100000,771952,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-07,7305,1.504478,3.00,1.5,3.57,0.427,,,,,,
31399,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-09-03,3.651,3.642,3.633,114.330,114.456,114.583,12.080,188.335,0.000,0.000,0.000,0.001,0.001,0.001,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-04-08,4.762,2019-10-07,4.762,1000000,407852,12500,12500,20Y,9100000,775952,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-08,7305,1.936198,3.00,1.4,3.32,0.322,4.827,4.808,4.847,-0.355,-0.356,-0.354
11346,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-10-01,3.650,3.640,3.630,114.293,114.433,114.573,12.006,186.501,-0.114,-0.115,-0.116,1.569,1.585,1.601,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-04-08,4.762,2019-10-07,4.762,1000000,410852,12500,12500,20Y,9100000,775452,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-09,7305,2.302505,3.00,1.5,3.32,0.320,-0.023,-0.037,-0.010,-0.002,-0.003,-0.001
9185,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-11-01,3.815,3.805,3.795,111.971,112.107,112.243,12.106,186.832,-0.043,-0.043,-0.043,0.582,0.582,0.583,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,405552,12500,12500,20Y,9100000,771652,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-10,7305,0.325273,3.00,1.1,3.41,0.395,-2.326,-2.322,-2.330,0.165,0.165,0.165
17705,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-12-02,3.736,3.726,3.716,113.005,113.142,113.279,12.055,185.488,-0.006,-0.006,-0.006,0.074,0.074,0.073,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,391133,12500,12500,20Y,9100000,764733,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-11,7305,0.728612,3.00,1.1,3.42,0.306,1.035,1.034,1.036,-0.079,-0.079,-0.079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25416,UI200244,MYBUI2002441,TOYOTA CAP MTN 1794D 26.9.2025 - MTN 8,2020-11-02,3.098,3.050,3.002,100.010,100.227,100.445,4.514,23.472,-0.001,0.000,0.001,0.004,-0.001,-0.005,0.0,201700026,Toyota Capital Malaysia Sdn Berhad,0,Fixed Rate Bond,Corporate(G),AAA (S),RAM,2020-10-28,2025-09-26,NaT,ACT365,2.0,2021-04-28,NaT,0.000,2021-04-28,3.100,2500,700,50,50,5Y,14600,700,CONVENTIONAL,FINANCIAL SERVICES,2019-12-04,False,False,Reaffirm,2020-10,1794,0.042466,1.75,-1.4,2.00,1.050,,,,,,
25442,VN200219,MYBVN2002191,MAYBANK IMTN 2.900% 09.10.2030,2020-11-02,2.956,2.906,2.856,99.743,99.971,100.200,4.558,23.895,-0.002,0.000,0.002,0.009,0.000,-0.010,2.0,201700006_1,Malayan Banking Berhad,1,Callable Bond,Financial,AA1,RAM,2020-10-09,2030-10-09,2025-10-09,ACT365,2.0,2021-04-09,NaT,0.000,2021-04-09,2.900,10000,6700,2300,2300,10Y,95110,12280,MURABAHAH,FINANCIAL SERVICES,2020-01-23,True,False,Reaffirm,2020-10,3652,0.190685,1.75,-1.4,2.59,0.316,,,,,,
25444,VP200220,MYBVP2002204,MAYBANK IMTN 3.100% 08.10.2032,2020-11-02,3.157,3.097,3.037,99.646,100.017,100.390,6.182,43.748,-0.002,0.000,0.002,0.012,0.000,-0.013,0.0,201700006_1,Malayan Banking Berhad,1,Callable Bond,Financial,AA1,RAM,2020-10-09,2032-10-08,2027-10-08,ACT365,2.0,2021-04-09,NaT,0.000,2021-04-09,3.100,10000,6700,700,700,15Y,95110,12280,MURABAHAH,FINANCIAL SERVICES,2020-01-23,True,False,Reaffirm,2020-10,4382,0.203836,1.75,-1.4,2.59,0.507,,,,,,
25475,VL200226,MYBVL2002260,PRASARANA SUKUK MURABAHAH 2.780% 20.10.2028 - S18,2020-11-02,2.780,2.752,2.724,99.999,100.198,100.398,7.096,57.211,-0.011,-0.011,-0.011,0.078,0.078,0.078,2.3,201700055,Prasarana Malaysia Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2020-10-22,2028-10-20,NaT,ACT365,2.0,2021-04-22,NaT,0.000,2021-04-22,2.780,10000,10000,700,700,10Y,50436,37000,MURABAHAH + TAWARRUQ,TRANSPORTATION,NaT,False,False,NOT RATED,2020-10,2920,0.083781,1.75,-1.4,2.59,0.162,,,,,,


In [18]:
# remove the first row and the last row of each stocks
# df_joined.dropna(subset=["CHANGES IN EVAL MID PRICE","NEXT MONTH CHANGES IN EVAL MID PRICE"],inplace=True)
df_joined.dropna(subset=["CHANGES IN EVAL MID PRICE"],inplace=True)
df_joined

Unnamed: 0,STOCK CODE,ISIN CODE,STOCK NAME,VALUE DATE,EVAL UPPER THRESHOLD YIELD,EVAL MID YIELD,EVAL LOWER THRESHOLD YIELD,EVAL LOWER THRESHOLD PRICE,EVAL MID PRICE,EVAL UPPER THRESHOLD PRICE,MODIFIED DURATION,CONVEXITY,EVAL UPPER THRESHOLD YIELD CHANGE,EVAL MID YIELD CHANGE,EVAL LOWER THRESHOLD YIELD CHANGE,EVAL LOWER THRESHOLD PRICE CHANGE,EVAL MID PRICE CHANGE,EVAL UPPER THRESHOLD PRICE CHANGE,COMPOSITE LIQUIDITY SCORE (T-1),FACILITY CODE,ISSUER NAME,PRINCIPLE,BOND TYPE,BOND CLASS,RATING,RATING AGENCY,ISSUE DATE,MATURITY DATE,EXPECTED MATURITY DATE,DAY COUNT BASIS,COUPON FREQUENCY,FIRST PAYMENT DATE,PREVIOUS PAYMENT DATE,PREVIOUS COUPON RATE,NEXT PAYMENT DATE,NEXT COUPON RATE,FACILITY AMOUNT/FACILITY LIMIT(MYR MIL),FACILITY OUTSTANDING AMOUNT(MYR MIL),BOND ISSUE AMOUNT(MYR MIL),BOND CURRENT OUTSTANDING AMOUNT(MYR MIL),REMAINING TENURE,ISSUER FACILITY LIMIT(MYR MIL),ISSUER OUTSTANDING AMOUNT(MYR MIL),ISLAMIC CONCEPT,SECTOR,RATING EFFECTIVE DATE,CALLABLE/PUTTABLE,CONVERTIBLE/EXCHANGABLE,RATING ACTION,VALUE DATE MONTH,MATURITY DURATION,ACCRUED INTEREST,OPR MOVEMENT,INFLATION RATE,MGS,CREDIT SPREAD,CHANGES IN EVAL MID PRICE,CHANGES IN EVAL LOWER THRESHOLD PRICE,CHANGES IN EVAL UPPER THRESHOLD PRICE,CHANGES IN EVAL MID YIELD,CHANGES IN EVAL LOWER THRESHOLD YIELD,CHANGES IN EVAL UPPER THRESHOLD YIELD
31399,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-09-03,3.651,3.642,3.633,114.330,114.456,114.583,12.080,188.335,0.000,0.000,0.000,0.001,0.001,0.001,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-04-08,4.762,2019-10-07,4.762,1000000,407852,12500,12500,20Y,9100000,775952,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-08,7305,1.936198,3.00,1.4,3.32,0.322,4.827,4.808,4.847,-0.355,-0.356,-0.354
11346,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-10-01,3.650,3.640,3.630,114.293,114.433,114.573,12.006,186.501,-0.114,-0.115,-0.116,1.569,1.585,1.601,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-04-08,4.762,2019-10-07,4.762,1000000,410852,12500,12500,20Y,9100000,775452,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-09,7305,2.302505,3.00,1.5,3.32,0.320,-0.023,-0.037,-0.010,-0.002,-0.003,-0.001
9185,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-11-01,3.815,3.805,3.795,111.971,112.107,112.243,12.106,186.832,-0.043,-0.043,-0.043,0.582,0.582,0.583,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,405552,12500,12500,20Y,9100000,771652,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-10,7305,0.325273,3.00,1.1,3.41,0.395,-2.326,-2.322,-2.330,0.165,0.165,0.165
17705,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2019-12-02,3.736,3.726,3.716,113.005,113.142,113.279,12.055,185.488,-0.006,-0.006,-0.006,0.074,0.074,0.073,3.7,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,391133,12500,12500,20Y,9100000,764733,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-11,7305,0.728612,3.00,1.1,3.42,0.306,1.035,1.034,1.036,-0.079,-0.079,-0.079
19880,MX170003,MYBMX1700033,MGS 3/2017 4.762% 07.04.2037,2020-01-02,3.641,3.630,3.619,114.264,114.416,114.569,12.010,184.293,-0.018,-0.018,-0.018,0.239,0.239,0.240,3.0,199600011,Government of Malaysia,0,Fixed Rate Bond,Govt,NR(LT),NOT RATED,2017-04-07,2037-04-07,NaT,ACTACT,2.0,2017-10-07,2019-10-07,4.762,2020-04-07,4.762,1000000,394133,12500,12500,20Y,9100000,764233,CONVENTIONAL,PUBLIC FINANCE,NaT,False,False,NOT RATED,2019-12,7305,1.131951,3.00,0.9,3.30,0.330,1.274,1.259,1.290,-0.096,-0.097,-0.095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23690,VZ200201,MYBVZ2002013,DANAINFRA IMTN 4.010% 23.09.2050 - Tranche No 107,2020-11-02,4.278,4.240,4.202,95.499,96.120,96.746,16.979,406.846,-0.001,-0.001,-0.001,0.016,0.017,0.017,0.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2020-09-23,2050-09-23,NaT,ACT365,2.0,2021-03-23,NaT,0.000,2021-03-23,4.010,71000,63320,1200,1200,25Y+,84000,72320,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-10,10957,0.439452,1.75,-1.4,2.59,1.650,0.036,0.376,-0.313,-0.002,0.019,-0.023
23677,VX200199,MYBVX2001994,DANAINFRA IMTN 3.720% 21.09.2040 - Tranche No 105,2020-11-02,3.772,3.744,3.716,99.274,99.662,100.053,13.894,245.135,0.000,0.000,0.000,0.000,-0.001,0.000,2.3,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2020-09-23,2040-09-21,NaT,ACT365,2.0,2021-03-23,NaT,0.000,2021-03-23,3.720,71000,63320,600,600,20Y,84000,72320,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-10,7303,0.407671,1.75,-1.4,2.59,1.154,-0.127,0.305,-0.565,0.009,0.040,-0.022
23702,VZ200200,MYBVZ2002005,DANAINFRA IMTN 3.870% 22.09.2045 - Tranche No 106,2020-11-02,3.997,3.959,3.921,98.006,98.596,99.191,15.744,331.265,0.000,0.000,0.000,0.000,0.000,0.000,0.0,201200042,DanaInfra Nasional Berhad,1,Fixed Rate Bond,Quasi-Govt,NR(LT),NOT RATED,2020-09-23,2045-09-22,NaT,ACT365,2.0,2021-03-23,NaT,0.000,2021-03-23,3.870,71000,63320,600,600,25Y,84000,72320,MURABAHAH,FINANCIAL SERVICES,NaT,False,False,NOT RATED,2020-10,9130,0.424110,1.75,-1.4,2.59,1.369,-0.266,0.064,-0.601,0.017,0.038,-0.004
14889,VG200169,MYBVG2001692,MRCB20PERP IMTN 3.850% 14.08.2023,2020-10-01,3.688,3.611,3.534,100.434,100.642,100.852,2.682,8.740,0.001,0.000,-0.001,-0.003,-0.001,0.002,2.0,202000027,Malaysian Resources Corporation Berhad,1,Fixed Rate Bond,Corporate,AA- IS,MARC,2020-08-14,2023-08-14,NaT,ACT365,2.0,2021-02-15,NaT,0.000,2021-02-15,3.850,5000,600,250,250,3Y,5680,600,MURABAHAH + TAWARRUQ,PROPERTY AND REAL ESTATE,2020-07-30,False,False,Initial,2020-09,1095,0.506301,1.75,-1.4,2.04,1.571,0.170,0.195,0.146,-0.068,-0.061,-0.075


In [20]:
df_joined.isnull().sum().sort_values().tail(10)

BOND TYPE                                0
EVAL LOWER THRESHOLD PRICE CHANGE        0
RATING                                   0
RATING AGENCY                            0
ISSUE DATE                               0
NEXT PAYMENT DATE                      291
FIRST PAYMENT DATE                    1784
PREVIOUS PAYMENT DATE                 2019
RATING EFFECTIVE DATE                 6508
EXPECTED MATURITY DATE               29331
dtype: int64

# Train Test Split

In [33]:
# test size 
test_size = 3
test_months = df_joined["VALUE DATE MONTH"].unique()[-3:]
test_months

<PeriodArray>
['2020-08', '2020-09', '2020-10']
Length: 3, dtype: period[M]

In [34]:
train_dataset = df_joined[~df_joined["VALUE DATE MONTH"].isin(test_months)]
test_dataset = df_joined[df_joined["VALUE DATE MONTH"].isin(test_months)]

print("Number of train set: "+ str(len(train_dataset)))
print("Number of test set: "+ str(len(test_dataset)))

Number of train set: 25300
Number of test set: 6387


In [35]:
train_dataset.to_csv("train_dataset.csv", index=False)
test_dataset.to_csv("test_dataset.csv", index=False)