# MD files for regression prepration

In [89]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

from collections import Counter

import re
import string 

import nltk 
nltk.download('twitter_samples')
from nltk.corpus import twitter_samples
from nltk.corpus import stopwords          # module for stop words that come with NLTK
nltk.download('stopwords')
from nltk.stem import PorterStemmer        # module for stemming
from nltk.tokenize import TweetTokenizer   # module for tokenizing strings

# ignore warning
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.filterwarnings('ignore')

# for stock price
import yfinance as yf
from yahoofinancials import YahooFinancials
pd.set_option('display.max_columns', None)

[nltk_data] Downloading package twitter_samples to
[nltk_data]     /Users/timliu/nltk_data...
[nltk_data]   Package twitter_samples is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/timliu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [90]:
# read the csv file 
topic_modelling_df_MD = pd.read_csv('./main_df_output/MD12_topic_modelling_df.csv')

# Define Functions

In [91]:
# clean text
def process_text(text):
    stemmer = PorterStemmer()
    stopwords_english = stopwords.words('english')
    #text = text.str
    text = str(text)
    text = re.sub(r'\$\w*', '', text)
    text = re.sub(r'^RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
    text = re.sub(r'#', '', text)
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True,reduce_len=True)
    text_tokens = tokenizer.tokenize(text)

    text_clean = []
    for word in text_tokens:
        if (word not in stopwords_english and  
                word not in string.punctuation): 
            stem_word = stemmer.stem(word)  # stemming word
            text_clean.append(stem_word)
            
    sentence = ' '.join(text_clean)
    
    return sentence

In [92]:
# calculate the stock price change
def stock_price_change(stock_list, start_date, end_date):
    # function for find price change
    def price_change(aapl_df, n_day):
        before = []
        after = []
        for i in range(len(aapl_df)):
            # price on that date
            prc_tdy = aapl_df['Close'][i]

            if ((i >= n_day) & 
                (i < (len(aapl_df)-n_day))):
                # price change before n_day
                prc_before = aapl_df['Close'][i-n_day]
                prc_change_before = (prc_tdy - prc_before)/prc_before
                # price change after n_day
                prc_after = aapl_df['Close'][i+n_day]
                prc_change_after = (prc_after - prc_tdy)/prc_tdy
            else:
                prc_change_before = 0
                prc_change_after = 0
            # append into list
            before.append(prc_change_before)
            after.append(prc_change_after)
        return before, after
        
    # tickers and its closing stock price
    stock_df = pd.DataFrame()
    for i in stock_list:
        aapl_df = yf.download(i, 
                        start= start_date,  # start='2010-12-01', # start 1 month before
                        end = end_date,   # end='2022-01-30', # end 1 month later
                        progress=False,)
        aapl_df = aapl_df.reset_index(drop=False)
        # add ticker name
        aapl_df['ticker']=i
        # add price change
        # aapl_df['D0'] = aapl_df['Close']
        aapl_df['D-1'],aapl_df['D+1'] = price_change(aapl_df, 1)
        aapl_df['D-2'],aapl_df['D+2'] = price_change(aapl_df, 2)
        aapl_df['D-3'],aapl_df['D+3'] = price_change(aapl_df, 3)
        aapl_df['D-5'],aapl_df['D+5'] = price_change(aapl_df, 5)
        aapl_df['D-10'],aapl_df['D+10'] = price_change(aapl_df, 10)
        aapl_df['D-15'],aapl_df['D+15'] = price_change(aapl_df, 15)
        # aapl_df['D-16'],aapl_df['D+16'] = price_change(aapl_df, 16)
        # aapl_df['D-17'],aapl_df['D+17'] = price_change(aapl_df, 17)
        # aapl_df['D-18'],aapl_df['D+18'] = price_change(aapl_df, 18)
        # aapl_df['D-19'],aapl_df['D+19'] = price_change(aapl_df, 19)
        # aapl_df['D-20'],aapl_df['D+20'] = price_change(aapl_df, 20)
        # aapl_df['D-21'],aapl_df['D+21'] = price_change(aapl_df, 21)
        # aapl_df['D-22'],aapl_df['D+22'] = price_change(aapl_df, 22)
        # aapl_df['D-23'],aapl_df['D+23'] = price_change(aapl_df, 23)
        # aapl_df['D-24'],aapl_df['D+24'] = price_change(aapl_df, 24)
        # aapl_df['D-25'],aapl_df['D+25'] = price_change(aapl_df, 25)
        # aapl_df['D-26'],aapl_df['D+26'] = price_change(aapl_df, 26)
        # aapl_df['D-27'],aapl_df['D+27'] = price_change(aapl_df, 27)
        # aapl_df['D-28'],aapl_df['D+28'] = price_change(aapl_df, 28)
        # aapl_df['D-29'],aapl_df['D+29'] = price_change(aapl_df, 29)
        # aapl_df['D-30'],aapl_df['D+30'] = price_change(aapl_df, 30)
        # append into one dataframe 
        stock_df = stock_df.append(aapl_df)
        
    # drop redundancy columns
    stock_price_df = stock_df.drop(columns=['Open', 'High','Low','Adj Close','Volume'])
    stock_price_df = stock_price_df.rename(columns={'Date': 'date'})
    # drop duplicated rows
    stock_price_df = stock_price_df.drop_duplicates()
    return stock_price_df

# Clean text and splitting to sentence

In [93]:
df = topic_modelling_df_MD.drop(['participants','idx','company_paticipants_yes','other_paticipants_yes',
                'paraghrph_noun', 'word_count', 'char_count', 'sentence_count',
                'avg_word_length', 'avg_sentence_length'], axis = 1)
df = df.rename(columns = {'paraghrph':'paraghraph','paraghrph_clean':'paraghraph_clean'})
df = df.sort_values(by=['file_name'])
df = df.reset_index(drop=True)

# splotting into the sentences
df['paraghraph'] = df['paraghraph'].apply(lambda r: r.replace("Mr. ","Mr."))
df['sentence'] = df['paraghraph'].apply(lambda r: r.split(". "))
df = df.explode('sentence')

# Apply the function to clean the text
df['clean_text'] = df['sentence'].apply(lambda r: process_text(r))
df.head(5)

df.head(5)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,token,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,sentence,clean_text
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,Later Mr.Lubelli will explain the financials i...,later mr.lubelli explain financi greater detail
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"Finally, we will take your questions",final take question


# Weighted topic Probability by files

In [94]:
topic_df = df.copy()
# cal len paragraph
topic_df['len_para'] = topic_df['paraghraph'].apply(lambda r: len(r))

In [95]:
cal_len_docs = topic_df[['file_name','len_para']]
# Use GroupBy() to compute the sum of the document
cal_len_docs = cal_len_docs.groupby('file_name').sum()
cal_len_docs = cal_len_docs.rename(columns={"len_para":"len_docs"})
cal_len_docs = cal_len_docs.reset_index(drop=False)
cal_len_docs.head(5)

Unnamed: 0,file_name,len_docs
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,324354
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,803392
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,1286394
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,490919


In [96]:
# calculate weighted average for topic probability
# -----> merge the cal_len_docs
merge_df = cal_len_docs.merge(topic_df, how='inner', on='file_name')
# -----> caculate the weight of the sentiment
merge_df['weighted']=merge_df['len_para']/merge_df['len_docs']
merge_df

Unnamed: 0,file_name,len_docs,date,company_name,paraghraph,paraghraph_clean,token,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,sentence,clean_text,len_para,weighted
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.079490,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,6915,0.005102
1,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.079490,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,6915,0.005102
2,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.079490,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,6915,0.005102
3,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.079490,Later Mr.Lubelli will explain the financials i...,later mr.lubelli explain financi greater detail,6915,0.005102
4,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.079490,"Finally, we will take your questions",final take question,6915,0.005102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395693,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,['highlight'],0.067120,0.031754,0.073851,0.044324,0.033697,0.044729,0.107323,0.056996,0.079961,0.041421,0.392546,0.026278,And welcome to the ﬁrst quarter presentation o...,welcom ﬁrst quarter present gjensidig,430,0.000232
395694,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,['highlight'],0.067120,0.031754,0.073851,0.044324,0.033697,0.044729,0.107323,0.056996,0.079961,0.041421,0.392546,0.026278,My name is Mitra Negard and I'm Head of Invest...,name mitra negard i'm head investor relat,430,0.000232
395695,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,['highlight'],0.067120,0.031754,0.073851,0.044324,0.033697,0.044729,0.107323,0.056996,0.079961,0.041421,0.392546,0.026278,"As always, we will start with our CEO, Helge L...",alway start ceo helg leiro baastad give highli...,430,0.000232
395696,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,['highlight'],0.067120,0.031754,0.073851,0.044324,0.033697,0.044729,0.107323,0.056996,0.079961,0.041421,0.392546,0.026278,"And we have, of course, a lot of time for Q&A ...",cours lot time q,430,0.000232


In [97]:
# -----> give the weighted average of the probability based on the paragraph length
for i in range (1,13):
    merge_df['topic_'+str(i)] = merge_df['topic_'+str(i)]*merge_df['weighted']
# -----> sum WA sentiment of each files
topic_df = merge_df.copy()
topic_df = topic_df.groupby(['file_name','date','company_name']).sum().reset_index(drop=False)
# drop the column: 'len_docs', 'len_para', 'weighted'
topic_df = topic_df.drop(['len_docs','len_para','weighted'], axis = 1)
topic_df

Unnamed: 0,file_name,date,company_name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.030681,0.000984,0.024119,0.001374,0.001044,0.001386,0.013716,0.001766,0.821822,0.028310,0.048007,0.026790
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.136593,0.001946,0.070819,0.234195,0.002065,0.244219,0.080002,0.095155,0.090277,0.002539,0.002473,0.039718
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.388708,0.003873,0.193777,0.019615,0.025130,0.015196,0.216159,0.067780,0.005369,0.033364,0.004894,0.026135
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.246952,0.001653,0.051924,0.007585,0.024254,0.186751,0.199126,0.014182,0.062011,0.083863,0.070805,0.050893
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.044631,0.236112,0.004499,0.002700,0.010587,0.012860,0.097757,0.028901,0.004871,0.532752,0.022729,0.001601
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.001598,0.255489,0.045652,0.010766,0.173939,0.010242,0.077044,0.011417,0.309331,0.010163,0.002015,0.092344
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.002433,0.000276,0.237947,0.132091,0.426850,0.048128,0.059669,0.001028,0.059144,0.021297,0.009755,0.001383
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.026715,0.000836,0.291678,0.011096,0.065805,0.001177,0.051068,0.001500,0.547283,0.001090,0.001062,0.000691
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.007968,0.637173,0.039055,0.011612,0.042694,0.076246,0.005742,0.007272,0.133920,0.001992,0.011421,0.024907


# Weighted Sentiment Score by files

In [98]:
import pickle
sentiment = pickle.load(open('sentiment_model.pkl', 'rb'))
df['sentiment'] = sentiment.predict(df['clean_text'])
df.head(3)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,token,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,sentence,clean_text,sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,1
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,1
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,1


In [99]:
# Weight of each sentimentc
# cal len paragraph
df['len_para'] = df['paraghraph'].apply(lambda r: len(r))
# cal len sentence 
df['len_sent'] = df['sentence'].apply(lambda r: len(r))
df.head(3)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,token,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,sentence,clean_text,sentiment,len_para,len_sent
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,1,6915,46
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,1,6915,55
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,"['overview', 'financial', 'standpoint', 'backd...",0.002985,0.001412,0.034613,0.001971,0.001499,0.001989,0.036102,0.002535,0.645794,0.048835,0.142774,0.07949,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,1,6915,106


In [100]:
cal_len_docs = df[['file_name','len_para']]
# Use GroupBy() to compute the sum of the document
cal_len_docs = cal_len_docs.groupby('file_name').sum()
cal_len_docs = cal_len_docs.rename(columns={"len_para":"len_docs"})
cal_len_docs = cal_len_docs.reset_index(drop=False)
cal_len_docs.head(5)

Unnamed: 0,file_name,len_docs
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,324354
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,803392
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,1286394
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,490919


In [101]:
# calculate weighted average for sentiment
# -----> merge the cal_len_docs
merge_df = cal_len_docs.merge(df, how='inner', on='file_name')
# -----> caculate the weight of the sentiment
merge_df['weighted']=merge_df['len_sent']/merge_df['len_docs']
# -----> give the weighted average of the sentiment score based on the sentence
merge_df['WA_sentiment'] = merge_df['weighted']*merge_df['sentiment']
# -----> sum WA sentiment of each files
sentiment_df = merge_df[['file_name','date','company_name','WA_sentiment']]
sentiment_df = sentiment_df.groupby(['file_name','date','company_name']).sum().reset_index(drop=False)
sentiment_df

Unnamed: 0,file_name,date,company_name,WA_sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.001179
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.012101
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.012195
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.006725
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.008690
...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.002443
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.002120
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.003111
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.000772


# merge the sentiment_df and topic_df together

In [102]:
# merge the sentiment_df and topic_df together
MD_model_df = topic_df.merge(sentiment_df, how='inner', on=['file_name','date','company_name'])
MD_model_df

Unnamed: 0,file_name,date,company_name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,WA_sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.030681,0.000984,0.024119,0.001374,0.001044,0.001386,0.013716,0.001766,0.821822,0.028310,0.048007,0.026790,0.001179
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.136593,0.001946,0.070819,0.234195,0.002065,0.244219,0.080002,0.095155,0.090277,0.002539,0.002473,0.039718,0.012101
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.388708,0.003873,0.193777,0.019615,0.025130,0.015196,0.216159,0.067780,0.005369,0.033364,0.004894,0.026135,0.012195
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.246952,0.001653,0.051924,0.007585,0.024254,0.186751,0.199126,0.014182,0.062011,0.083863,0.070805,0.050893,0.006725
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.044631,0.236112,0.004499,0.002700,0.010587,0.012860,0.097757,0.028901,0.004871,0.532752,0.022729,0.001601,0.008690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.001598,0.255489,0.045652,0.010766,0.173939,0.010242,0.077044,0.011417,0.309331,0.010163,0.002015,0.092344,0.002443
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.002433,0.000276,0.237947,0.132091,0.426850,0.048128,0.059669,0.001028,0.059144,0.021297,0.009755,0.001383,0.002120
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.026715,0.000836,0.291678,0.011096,0.065805,0.001177,0.051068,0.001500,0.547283,0.001090,0.001062,0.000691,0.003111
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.007968,0.637173,0.039055,0.011612,0.042694,0.076246,0.005742,0.007272,0.133920,0.001992,0.011421,0.024907,0.000772


# Stock Price

In [103]:
# List all yahoo tickers
yahoo_ticker_list = [
    # Motor/Personal
    'ADM.L','DLG.L','SBRE.L','SAGA.L','AGS.BR',
    # Global Commercial
    'ALV.DE','CS.PA','ZURN.SW','G.MI',
    # London Market 
    'BEZ.L','HSX.L','LRE.L',
    # LN Equity 
    'LLOY.L',
    # US - Specialty/P&C/Reinsurance
    'AIG','AXS','TRV','ACGL','RNR','RE','MKL','HIG','ARGO','BRK-B','CB',
    # European (Re)Insurers
    'SCR.PA','MUV2.DE','SREN.SW','HNR1.DE',
    # Japanese & Pacific
    '8766.T','8630.T','8725.T','QBE.AX',
    # Run-off
    'ESGR','FFH.TO','RQIH.L',
    # Life Groups & Retail Life
    'PRU.L','MNG.L','LGEN.L','AV.L','PHNX.L','QLT.L','JUST.L','STJ.L','AGN.AS','DSY.JO','SLM.JO',
    # Other insurers
    'STB.OL','CNP.PA','GJF.OL','PST.MI','NN.AS','TOP.CO','BALN.SW',
    'SAMPO.HE','MAP.MC','TRYG.CO','0RHS.IL','HELN.SW','IFC.TO'
]

# Match it with company names from BoE
Insurer_Names_df = pd.read_excel('./input/Insurer_Names_for_possible_NLP_analysis.xlsx')  
Insurer_Names = Insurer_Names_df.drop(['Unnamed: 0', 'Unnamed: 4'], axis=1)
Insurer_Names = Insurer_Names.drop([0])
Insurer_Names.rename(columns={"Unnamed: 1": "Company", "Unnamed: 2": "bb ticker", "Unnamed: 3":"Group"}, inplace = True)
Insurer_Names['yahoo ticker'] = yahoo_ticker_list
Insurer_Names.head(5)
# Exported as CSV to manually matched with the extracted file names (column 'company_name' in model_df)
# Insurer_Names[['Company']].to_csv('./main_df_output/company_name_match.csv', index = False)

Unnamed: 0,Company,bb ticker,Group,yahoo ticker
1,ADMIRAL GROUP,ADM LN,Motor/Personal,ADM.L
2,DIRECT LINE INSU,DLG LN,Motor/Personal,DLG.L
3,SABRE INSUR,SBRE LN,Motor/Personal,SBRE.L
4,SAGA PLC,SAGA LN,Motor/Personal,SAGA.L
5,AGEAS,AGS BB,Motor/Personal,AGS.BR


In [104]:
company_name_match = pd.read_csv('./input/company_name_match.csv')
company_name_match.rename(columns={"Company": "company_name"}, inplace = True)
company_name_match

Groups = {
        "ADMIRAL GROUP" : "Motor/Personal", "DIRECT LINE INSU" : "Motor/Personal", 
        "SABRE INSUR" : "Motor/Personal", "SAGA PLC" : "Motor/Personal",
        "AGEAS" : "Motor/Personal",

        "ALLIANZ SE-REG" : "Global Commercial", "AXA" : "Global Commercial", 
        "ZURICH INSURANCE" : "Global Commercial", "GENERALI ASSIC" : "Global Commercial",

        "BEAZLEY PLC" : "London Market", "HISCOX LTD" : "London Market", 
        "LANCASHIRE HOLDI" : "London Market",
        
        "Society of Lloyd’s" : "LN Equity",

        "AMERICAN INTERNA" : "US - Specialty/P&C/Reinsurance", "AXIS CAPITAL" : "US - Specialty/P&C/Reinsurance",
        "TRAVELERS COS IN" : "US - Specialty/P&C/Reinsurance", "ARCH CAPITAL GRP" : "US - Specialty/P&C/Reinsurance",
        "RENAISSANCERE" : "US - Specialty/P&C/Reinsurance", "EVEREST RE GROUP" : "US - Specialty/P&C/Reinsurance",
        "MARKEL CORP" : "US - Specialty/P&C/Reinsurance", "HARTFORD FINL SV" : "US - Specialty/P&C/Reinsurance",
        "ARGO GROUP INTER" : "US - Specialty/P&C/Reinsurance", "BERKSHIRE HATH-B" : "US - Specialty/P&C/Reinsurance",
        "CHUBB LTD" : "US - Specialty/P&C/Reinsurance",

        "SCOR SE" : "European (Re)Insurers", "MUENCHENER RUE-R" : "European (Re)Insurers", 
        "SWISS RE AG" : "European (Re)Insurers", "HANNOVER RUECK S" : "European (Re)Insurers",

        "TOKIO MARINE HD" : "Japanese & Pacific", "SOMPO HOLDINGS I" : "Japanese & Pacific",
        "MS&AD INSURANCE" : "Japanese & Pacific", "QBE INSURANCE" : "Japanese & Pacific",

        "ENSTAR GROUP LTD" : "Run-off", "FAIRFAX FINL HLD" : "Run-off", "RANDALL & QUILTE" : "Run-off",

        "PRUDENTIAL PLC" : "Life Groups & Retail Life", "M&G PLC" : "Life Groups & Retail Life",
        "LEGAL & GEN GRP" : "Life Groups & Retail Life", "AVIVA PLC" : "Life Groups & Retail Life",
        "PHOENIX GROUP HO" : "Life Groups & Retail Life", "QUILTER PLC" : "Life Groups & Retail Life",
        "JUST GROUP" : "Life Groups & Retail Life", "ST JAMES'S PLACE" : "Life Groups & Retail Life",
        "AEGON NV" : "Life Groups & Retail Life", "DISCOVERY LTD" : "Life Groups & Retail Life",
        "SANLAM LTD" : "Life Groups & Retail Life",

        "STOREBRAND ASA" : "Other insurers", "CNP ASSURANCES" : "Other insurers",
        "GJENSIDIGE FORSI" : "Other insurers", "POSTE ITALIANE" : "Other insurers",
        "NN GROUP" : "Other insurers", "TOPDANMARK A/S" : "Other insurers",
        "BALOISE HOL-REG" : "Other insurers", "SAMPO OYJ-A SHS" : "Other insurers",
        "MAPFRE SA" : "Other insurers", "TRYG A/S" : "Other insurers",
        "ASR NEDERLAND NV" : "Other insurers", "HELVETIA HOL-REG" : "Other insurers",
        "INTACT FINANCIAL" : "Other insurers"
}

yahoo_ticker = {
        "ADMIRAL GROUP" : "ADM.L", "DIRECT LINE INSU" : "DLG.L", 
        "SABRE INSUR" : "SBRE.L", "SAGA PLC" : "SAGA.L",
        "AGEAS" : "AGS.BR", 
        
         "ALLIANZ SE-REG" : "ALV.DE", "AXA" : "CS.PA", 
        "ZURICH INSURANCE" : "ZURN.SW", "GENERALI ASSIC" : "G.MI",

        "BEAZLEY PLC" : "BEZ.L", "HISCOX LTD" : "HSX.L", 
        "LANCASHIRE HOLDI" : "LRE.L",

        "Society of Lloyd’s" : "LLOY.L",

        "AMERICAN INTERNA" : "AIG", "AXIS CAPITAL" : "AXS",
        "TRAVELERS COS IN" : "TRV", "ARCH CAPITAL GRP" : "ACGL",
        "RENAISSANCERE" : "RNR", "EVEREST RE GROUP" : "RE",
        "MARKEL CORP" : "MKL", "HARTFORD FINL SV" : "HIG",
        "ARGO GROUP INTER" : "ARGO", "BERKSHIRE HATH-B" : "BRK-B",
        "CHUBB LTD" : "CB",

        "SCOR SE" : "SCR.PA", "MUENCHENER RUE-R" : "MUV2.DE", 
        "SWISS RE AG" : "SREN.SW", "HANNOVER RUECK S" : "HNR1.DE",

        "TOKIO MARINE HD" : "8766.T", "SOMPO HOLDINGS I" : "8630.T",
        "MS&AD INSURANCE" : "8725.T", "QBE INSURANCE" : "QBE.AX",

        "ENSTAR GROUP LTD" : "ESGR", "FAIRFAX FINL HLD" : "FFH.TO", "RANDALL & QUILTE" : "RQIH.L",

        "PRUDENTIAL PLC" : "PRU.L", "M&G PLC" : "MNG.L",
        "LEGAL & GEN GRP" : "LGEN.L", "AVIVA PLC" : "AV.L",
        "PHOENIX GROUP HO" : "PHNX.L", "QUILTER PLC" : "QLT.L",
        "JUST GROUP" : "JUST.L", "ST JAMES'S PLACE" : "STJ.L",
        "AEGON NV" : "AGN.AS", "DISCOVERY LTD" : "DSY.JO",
        "SANLAM LTD" : "SLM.JO",

        "STOREBRAND ASA" : "STB.OL", "CNP ASSURANCES" : "CNP.PA",
        "GJENSIDIGE FORSI" : "GJF.OL", "POSTE ITALIANE" : "PST.MI",
        "NN GROUP" : "NN.AS", "TOPDANMARK A/S" : "TOP.CO",
        "BALOISE HOL-REG" : "BALN.SW", "SAMPO OYJ-A SHS" : "SAMPO.HE",
        "MAPFRE SA" : "MAP.MC", "TRYG A/S" : "TRYG.CO",
        "ASR NEDERLAND NV" : "0RHS.IL", "HELVETIA HOL-REG" : "HELN.SW",
        "INTACT FINANCIAL" : "IFC.TO"
}

company_name_match['Group'] =company_name_match['Real_Company_Name'].map(Groups)
company_name_match['yfiance_ticker'] =company_name_match['Real_Company_Name'].map(yahoo_ticker)
company_name_match

Unnamed: 0,Real_Company_Name,company_name,Group,yfiance_ticker
0,ADMIRAL GROUP,Admiral Group,Motor/Personal,ADM.L
1,DIRECT LINE INSU,DIRECT LINE,Motor/Personal,DLG.L
2,SABRE INSUR,SABRE INSUR-,Motor/Personal,SBRE.L
3,SAGA PLC,Saga PLC-,Motor/Personal,SAGA.L
4,AGEAS,Ageas SA-NV-,Motor/Personal,AGS.BR
...,...,...,...,...
62,MAPFRE SA,,Other insurers,MAP.MC
63,TRYG A/S,Tryg A-S-,Other insurers,TRYG.CO
64,ASR NEDERLAND NV,ASR Nederland,Other insurers,0RHS.IL
65,HELVETIA HOL-REG,Helvetia Holding,Other insurers,HELN.SW


In [105]:
df_company = company_name_match[['company_name','Group','yfiance_ticker']]
MD_model_df = df_company.merge(MD_model_df, how='inner', on='company_name')
MD_model_df = MD_model_df.rename(columns={'yfiance_ticker':'ticker'})
MD_model_df

Unnamed: 0,company_name,Group,ticker,file_name,date,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,WA_sentiment
0,Admiral Group,Motor/Personal,ADM.L,20110302_Admiral_Group_PLC-_Earnings_Call_2011...,2011-03-02,0.002680,0.572789,0.032527,0.000657,0.000499,0.044301,0.227743,0.008427,0.018461,0.037488,0.005803,0.048624,0.000392
1,Admiral Group,Motor/Personal,ADM.L,20110824_Admiral_Group_PLC-_Earnings_Call_2011...,2011-08-24,0.108180,0.505089,0.055883,0.030197,0.024078,0.056217,0.075847,0.025543,0.004550,0.042983,0.000752,0.070681,0.000765
2,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.015945,0.415151,0.123024,0.001712,0.001302,0.028433,0.137670,0.042511,0.190022,0.001600,0.001559,0.041072,-0.000433
3,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.015842,0.691033,0.000680,0.000972,0.009756,0.040082,0.118486,0.017031,0.002626,0.076872,0.007984,0.018637,0.000336
4,Admiral Group,Motor/Personal,ADM.L,20120307_Admiral_Group_PLC-_Earnings_Call_2012...,2012-03-07,0.009562,0.627906,0.088888,0.012241,0.011544,0.041281,0.123975,0.001258,0.029679,0.052195,0.000891,0.000580,0.001669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1870,INTACT FINANCIAL-,Other insurers,IFC.TO,20210210_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-02-10,0.041957,0.001344,0.102214,0.077124,0.279696,0.001894,0.103217,0.015510,0.035981,0.331391,0.002041,0.007632,0.008256
1871,INTACT FINANCIAL-,Other insurers,IFC.TO,20210512_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-05-12,0.043230,0.169032,0.071369,0.001465,0.140507,0.011193,0.124987,0.009594,0.002642,0.418685,0.006427,0.000868,0.005915
1872,INTACT FINANCIAL-,Other insurers,IFC.TO,20210728_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-07-28,0.103656,0.001208,0.219402,0.058934,0.078123,0.048635,0.128884,0.006617,0.072151,0.279312,0.002077,0.001000,0.005511
1873,INTACT FINANCIAL-,Other insurers,IFC.TO,20211110_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-11-10,0.002421,0.157843,0.233466,0.071826,0.129143,0.001613,0.003871,0.008914,0.090688,0.292745,0.006521,0.000948,0.004812


In [106]:
start_date='2010-11-01'
end_date='2022-03-02'
stock_list = company_name_match['yfiance_ticker']
df_stock = stock_price_change(stock_list, start_date, end_date)

# limit the dat till 2021-12-31
df_stock = df_stock[df_stock['date']<='2021-12-31']

df_stock


1 Failed download:
- CNP.PA: No data found, symbol may be delisted


Unnamed: 0,date,Close,ticker,D-1,D+1,D-2,D+2,D-3,D+3,D-5,D+5,D-10,D+10,D-15,D+15
0,2010-11-01,1611.000000,ADM.L,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,2010-11-02,1627.000000,ADM.L,0.009932,0.026429,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,2010-11-03,1670.000000,ADM.L,0.026429,0.005988,0.036623,0.013772,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,2010-11-04,1680.000000,ADM.L,0.005988,0.007738,0.032575,0.000000,0.042831,-0.005357,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,2010-11-05,1693.000000,ADM.L,0.007738,-0.007679,0.013772,-0.012995,0.040565,-0.035440,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2798,2021-12-23,163.240005,IFC.TO,-0.001101,0.001470,0.001841,0.008025,0.012090,0.006493,0.005049,0.002940,0.012153,0.010475,0.004678,-0.010843
2799,2021-12-24,163.479996,IFC.TO,0.001470,0.006545,0.000367,0.005016,0.003314,0.005750,0.003684,0.000856,0.013138,0.004037,0.006774,-0.013335
2800,2021-12-29,164.550003,IFC.TO,0.006545,-0.001519,0.008025,-0.000790,0.006915,-0.005044,0.020212,0.011972,0.020022,-0.002492,0.013114,-0.005409
2801,2021-12-30,164.300003,IFC.TO,-0.001519,0.000730,0.005016,-0.003530,0.006493,-0.004139,0.008347,0.003956,0.033984,-0.022581,0.004586,0.000000


In [107]:
df_stock['date'] = df_stock['date'].astype(str)
# merged company tickers into big dataframe
MD_model_df = MD_model_df.join(df_stock.set_index(["date","ticker"]), 
                         on=["date","ticker"],
                         how='left'
                        )
MD_model_df = MD_model_df.dropna()
MD_model_df = MD_model_df.reset_index(drop=True)
MD_model_df

Unnamed: 0,company_name,Group,ticker,file_name,date,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,WA_sentiment,Close,D-1,D+1,D-2,D+2,D-3,D+3,D-5,D+5,D-10,D+10,D-15,D+15
0,Admiral Group,Motor/Personal,ADM.L,20110302_Admiral_Group_PLC-_Earnings_Call_2011...,2011-03-02,0.002680,0.572789,0.032527,0.000657,0.000499,0.044301,0.227743,0.008427,0.018461,0.037488,0.005803,0.048624,0.000392,1655.000000,-0.031030,0.013897,-0.020130,0.015106,-0.003612,-0.000604,-0.011350,-0.003021,-0.029326,-0.060423,-0.030463,-0.045317
1,Admiral Group,Motor/Personal,ADM.L,20110824_Admiral_Group_PLC-_Earnings_Call_2011...,2011-08-24,0.108180,0.505089,0.055883,0.030197,0.024078,0.056217,0.075847,0.025543,0.004550,0.042983,0.000752,0.070681,0.000765,1353.000000,-0.118567,-0.054693,-0.110454,-0.025868,-0.094983,-0.005913,-0.125969,0.011086,-0.031496,0.008130,-0.117417,-0.028825
2,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.015945,0.415151,0.123024,0.001712,0.001302,0.028433,0.137670,0.042511,0.190022,0.001600,0.001559,0.041072,-0.000433,887.500000,-0.256077,-0.076056,-0.247881,-0.053521,-0.251686,-0.061972,-0.234914,-0.098028,-0.276691,-0.025352,-0.275510,0.039437
3,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.015842,0.691033,0.000680,0.000972,0.009756,0.040082,0.118486,0.017031,0.002626,0.076872,0.007984,0.018637,0.000336,887.500000,-0.256077,-0.076056,-0.247881,-0.053521,-0.251686,-0.061972,-0.234914,-0.098028,-0.276691,-0.025352,-0.275510,0.039437
4,Admiral Group,Motor/Personal,ADM.L,20120307_Admiral_Group_PLC-_Earnings_Call_2012...,2012-03-07,0.009562,0.627906,0.088888,0.012241,0.011544,0.041281,0.123975,0.001258,0.029679,0.052195,0.000891,0.000580,0.001669,1144.000000,0.100000,0.015734,0.095785,0.012238,0.080264,0.022727,0.062210,0.036713,0.103182,0.020979,0.179990,0.014860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,INTACT FINANCIAL-,Other insurers,IFC.TO,20201104_INTACT_FINANCIAL-_Earnings_Call_2020-...,2020-11-04,0.011608,0.018569,0.156463,0.002010,0.231731,0.007382,0.128001,0.013196,0.055015,0.372329,0.002504,0.001192,0.003004,147.750000,0.063332,-0.018274,0.069258,-0.019425,0.073609,-0.060778,0.063179,-0.080203,0.031990,0.000203,0.026255,-0.019628
1808,INTACT FINANCIAL-,Other insurers,IFC.TO,20210210_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-02-10,0.041957,0.001344,0.102214,0.077124,0.279696,0.001894,0.103217,0.015510,0.035981,0.331391,0.002041,0.007632,0.008256,151.820007,0.034267,-0.013371,0.041432,-0.016994,0.039294,-0.032275,0.050876,-0.041101,0.061678,-0.054802,0.054013,-0.033263
1809,INTACT FINANCIAL-,Other insurers,IFC.TO,20210512_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-05-12,0.043230,0.169032,0.071369,0.001465,0.140507,0.011193,0.124987,0.009594,0.002642,0.418685,0.006427,0.000868,0.005915,162.309998,-0.016541,-0.011460,-0.027909,-0.011521,-0.015945,-0.015218,-0.009157,-0.027663,-0.003744,0.002403,-0.002704,0.041341
1810,INTACT FINANCIAL-,Other insurers,IFC.TO,20210728_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-07-28,0.103656,0.001208,0.219402,0.058934,0.078123,0.048635,0.128884,0.006617,0.072151,0.279312,0.002077,0.001000,0.005511,169.880005,0.001828,-0.002943,0.008669,0.000706,0.007592,-0.002826,0.000294,0.000530,0.001238,0.026725,-0.010427,0.036320


In [108]:
MD_model_df.to_csv("./regression_df_input/MD_model_df.csv", index = False)