# MD files for regression prepration

In [28]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

from collections import Counter

import re
import string 

import nltk 
nltk.download('twitter_samples')
from nltk.corpus import twitter_samples
from nltk.corpus import stopwords          # module for stop words that come with NLTK
nltk.download('stopwords')
from nltk.stem import PorterStemmer        # module for stemming
from nltk.tokenize import TweetTokenizer   # module for tokenizing strings

# ignore warning
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.filterwarnings('ignore')

# for stock price
import yfinance as yf
from yahoofinancials import YahooFinancials
pd.set_option('display.max_columns', None)

[nltk_data] Downloading package twitter_samples to
[nltk_data]     /Users/timliu/nltk_data...
[nltk_data]   Package twitter_samples is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/timliu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [29]:
# read the csv file 
topic_modelling_df_MD = pd.read_csv('./main_df_output/MD10_topic_modelling_df.csv')

# Define Functions

In [30]:
# clean text
def process_text(text):
    stemmer = PorterStemmer()
    stopwords_english = stopwords.words('english')
    #text = text.str
    text = str(text)
    text = re.sub(r'\$\w*', '', text)
    text = re.sub(r'^RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
    text = re.sub(r'#', '', text)
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True,reduce_len=True)
    text_tokens = tokenizer.tokenize(text)

    text_clean = []
    for word in text_tokens:
        if (word not in stopwords_english and  
                word not in string.punctuation): 
            stem_word = stemmer.stem(word)  # stemming word
            text_clean.append(stem_word)
            
    sentence = ' '.join(text_clean)
    
    return sentence

In [31]:
# calculate the stock price change
def stock_price_change(stock_list, start_date, end_date):
    # function for find price change
    def price_change(aapl_df, n_day):
        before = []
        after = []
        for i in range(len(aapl_df)):
            # price on that date
            prc_tdy = aapl_df['Close'][i]

            if ((i >= n_day) & 
                (i < (len(aapl_df)-n_day))):
                # price change before n_day
                prc_before = aapl_df['Close'][i-n_day]
                prc_change_before = (prc_tdy - prc_before)/prc_before
                # price change after n_day
                prc_after = aapl_df['Close'][i+n_day]
                prc_change_after = (prc_after - prc_tdy)/prc_tdy
            else:
                prc_change_before = 0
                prc_change_after = 0
            # append into list
            before.append(prc_change_before)
            after.append(prc_change_after)
        return before, after
        
    # tickers and its closing stock price
    stock_df = pd.DataFrame()
    for i in stock_list:
        aapl_df = yf.download(i, 
                        start= start_date,  # start='2010-12-01', # start 1 month before
                        end = end_date,   # end='2022-01-30', # end 1 month later
                        progress=False,)
        aapl_df = aapl_df.reset_index(drop=False)
        # add ticker name
        aapl_df['ticker']=i
        # add price change
        # aapl_df['D0'] = aapl_df['Close']
        aapl_df['D-1'],aapl_df['D+1'] = price_change(aapl_df, 1)
        aapl_df['D-2'],aapl_df['D+2'] = price_change(aapl_df, 2)
        aapl_df['D-3'],aapl_df['D+3'] = price_change(aapl_df, 3)
        aapl_df['D-5'],aapl_df['D+5'] = price_change(aapl_df, 5)
        aapl_df['D-10'],aapl_df['D+10'] = price_change(aapl_df, 10)
        aapl_df['D-15'],aapl_df['D+15'] = price_change(aapl_df, 15)
        aapl_df['D-16'],aapl_df['D+16'] = price_change(aapl_df, 16)
        aapl_df['D-17'],aapl_df['D+17'] = price_change(aapl_df, 17)
        aapl_df['D-18'],aapl_df['D+18'] = price_change(aapl_df, 18)
        aapl_df['D-19'],aapl_df['D+19'] = price_change(aapl_df, 19)
        aapl_df['D-20'],aapl_df['D+20'] = price_change(aapl_df, 20)
        aapl_df['D-21'],aapl_df['D+21'] = price_change(aapl_df, 21)
        aapl_df['D-22'],aapl_df['D+22'] = price_change(aapl_df, 22)
        aapl_df['D-23'],aapl_df['D+23'] = price_change(aapl_df, 23)
        aapl_df['D-24'],aapl_df['D+24'] = price_change(aapl_df, 24)
        aapl_df['D-25'],aapl_df['D+25'] = price_change(aapl_df, 25)
        aapl_df['D-26'],aapl_df['D+26'] = price_change(aapl_df, 26)
        aapl_df['D-27'],aapl_df['D+27'] = price_change(aapl_df, 27)
        aapl_df['D-28'],aapl_df['D+28'] = price_change(aapl_df, 28)
        aapl_df['D-29'],aapl_df['D+29'] = price_change(aapl_df, 29)
        aapl_df['D-30'],aapl_df['D+30'] = price_change(aapl_df, 30)
        aapl_df['D-40'],aapl_df['D+40'] = price_change(aapl_df, 40)
        aapl_df['D-50'],aapl_df['D+50'] = price_change(aapl_df, 50)
        aapl_df['D-60'],aapl_df['D+60'] = price_change(aapl_df, 60)
        # append into one dataframe 
        stock_df = stock_df.append(aapl_df)
        
    # drop redundancy columns
    stock_price_df = stock_df.drop(columns=['Open', 'High','Low','Adj Close','Volume'])
    stock_price_df = stock_price_df.rename(columns={'Date': 'date'})
    # drop duplicated rows
    stock_price_df = stock_price_df.drop_duplicates()
    return stock_price_df

# Clean text and splitting to sentence

In [32]:
df = topic_modelling_df_MD.drop(['participants','idx','company_paticipants_yes','other_paticipants_yes',
                'paraghrph_noun', 'word_count', 'char_count', 'sentence_count',
                'avg_word_length', 'avg_sentence_length'], axis = 1)
df = df.rename(columns = {'paraghrph':'paraghraph','paraghrph_clean':'paraghraph_clean'})
df = df.sort_values(by=['file_name'])
df = df.reset_index(drop=True)

# splotting into the sentences
df['paraghraph'] = df['paraghraph'].apply(lambda r: r.replace("Mr. ","Mr."))
df['sentence'] = df['paraghraph'].apply(lambda r: r.split(". "))
df = df.explode('sentence')

# Apply the function to clean the text
df['clean_text'] = df['sentence'].apply(lambda r: process_text(r))
df.head(5)

df.head(5)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,sentence,clean_text
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,Later Mr.Lubelli will explain the financials i...,later mr.lubelli explain financi greater detail
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"Finally, we will take your questions",final take question


# Weighted topic Probability by files

In [33]:
topic_df = df.copy()
# cal len paragraph
topic_df['len_para'] = topic_df['paraghraph'].apply(lambda r: len(r))

In [34]:
cal_len_docs = topic_df[['file_name','len_para']]
# Use GroupBy() to compute the sum of the document
cal_len_docs = cal_len_docs.groupby('file_name').sum()
cal_len_docs = cal_len_docs.rename(columns={"len_para":"len_docs"})
cal_len_docs = cal_len_docs.reset_index(drop=False)
cal_len_docs.head(5)

Unnamed: 0,file_name,len_docs
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,324354
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,803392
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,1286394
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,490919


In [35]:
# calculate weighted average for topic probability
# -----> merge the cal_len_docs
merge_df = cal_len_docs.merge(topic_df, how='inner', on='file_name')
# -----> caculate the weight of the sentiment
merge_df['weighted']=merge_df['len_para']/merge_df['len_docs']
merge_df

Unnamed: 0,file_name,len_docs,date,company_name,paraghraph,paraghraph_clean,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,sentence,clean_text,len_para,weighted
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.032910,0.001339,0.018703,0.002442,0.060343,0.002060,0.355080,0.048446,0.047775,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,6915,0.005102
1,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.032910,0.001339,0.018703,0.002442,0.060343,0.002060,0.355080,0.048446,0.047775,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,6915,0.005102
2,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.032910,0.001339,0.018703,0.002442,0.060343,0.002060,0.355080,0.048446,0.047775,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,6915,0.005102
3,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.032910,0.001339,0.018703,0.002442,0.060343,0.002060,0.355080,0.048446,0.047775,Later Mr.Lubelli will explain the financials i...,later mr.lubelli explain financi greater detail,6915,0.005102
4,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.032910,0.001339,0.018703,0.002442,0.060343,0.002060,0.355080,0.048446,0.047775,"Finally, we will take your questions",final take question,6915,0.005102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395693,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,0.070179,0.083539,0.033002,0.097194,0.060204,0.032063,0.050775,0.384272,0.102663,0.086109,And welcome to the ﬁrst quarter presentation o...,welcom ﬁrst quarter present gjensidig,430,0.000232
395694,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,0.070179,0.083539,0.033002,0.097194,0.060204,0.032063,0.050775,0.384272,0.102663,0.086109,My name is Mitra Negard and I'm Head of Invest...,name mitra negard i'm head investor relat,430,0.000232
395695,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,0.070179,0.083539,0.033002,0.097194,0.060204,0.032063,0.050775,0.384272,0.102663,0.086109,"As always, we will start with our CEO, Helge L...",alway start ceo helg leiro baastad give highli...,430,0.000232
395696,20220427_GJENSIDIGE_FORSI-_Earnings_Call_2022-...,1853590,2022-04-27,GJENSIDIGE FORSI-,"3974076 Thank you. Good morning, everyone. A...",Thank you Good morning everyone And welcome...,0.070179,0.083539,0.033002,0.097194,0.060204,0.032063,0.050775,0.384272,0.102663,0.086109,"And we have, of course, a lot of time for Q&A ...",cours lot time q,430,0.000232


In [36]:
# -----> give the weighted average of the probability based on the paragraph length
for i in range (1,11):
    merge_df['topic_'+str(i)] = merge_df['topic_'+str(i)]*merge_df['weighted']
# -----> sum WA sentiment of each files
topic_df = merge_df.copy()
topic_df = topic_df.groupby(['file_name','date','company_name']).sum().reset_index(drop=False)
# drop the column: 'len_docs', 'len_para', 'weighted'
topic_df = topic_df.drop(['len_docs','len_para','weighted'], axis = 1)
topic_df

Unnamed: 0,file_name,date,company_name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.656253,0.012079,0.000904,0.012631,0.057451,0.030606,0.001391,0.163704,0.047936,0.017045
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.202508,0.093453,0.002578,0.120774,0.064953,0.018331,0.002840,0.003085,0.131828,0.359650
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.017086,0.072191,0.001909,0.132086,0.315447,0.036262,0.054925,0.034294,0.316347,0.019453
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.003291,0.036449,0.001548,0.069775,0.160342,0.001503,0.075499,0.006455,0.266193,0.378945
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.003687,0.020716,0.180356,0.005106,0.120406,0.001684,0.538324,0.001071,0.051059,0.077589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.212463,0.006642,0.242472,0.122330,0.070116,0.149912,0.029469,0.012601,0.045702,0.108293
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.074169,0.003564,0.000486,0.428340,0.007244,0.021312,0.004922,0.324300,0.086397,0.049266
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.310859,0.003878,0.001142,0.411355,0.001352,0.000720,0.062917,0.043701,0.002306,0.161770
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.133917,0.015293,0.525656,0.080875,0.017488,0.001255,0.005953,0.000798,0.004018,0.214747


# Weighted Sentiment Score by files

In [37]:
import pickle
sentiment = pickle.load(open('sentiment_model.pkl', 'rb'))
df['sentiment'] = sentiment.predict(df['clean_text'])
df.head(3)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,sentence,clean_text,sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,1
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,1
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,1


In [38]:
# Weight of each sentimentc
# cal len paragraph
df['len_para'] = df['paraghraph'].apply(lambda r: len(r))
# cal len sentence 
df['len_sent'] = df['sentence'].apply(lambda r: len(r))
df.head(3)

Unnamed: 0,file_name,date,company_name,paraghraph,paraghraph_clean,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,sentence,clean_text,sentiment,len_para,len_sent
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"3910673 Good afternoon, ladies and gentlemen",3910673 good afternoon ladi gentlemen,1,6915,46
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,Welcome to MAPFRE's full-year 2010 results pre...,welcom mapfre' full-year 2010 result present,1,6915,55
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,"3910673 Good afternoon, ladies and gentlemen...",Good afternoon ladies and gentlemen Welcome...,0.430903,0.03291,0.001339,0.018703,0.002442,0.060343,0.00206,0.35508,0.048446,0.047775,"As usual, I'll give you an overview of the res...",usual i'll give overview result main busi deve...,1,6915,106


In [39]:
cal_len_docs = df[['file_name','len_para']]
# Use GroupBy() to compute the sum of the document
cal_len_docs = cal_len_docs.groupby('file_name').sum()
cal_len_docs = cal_len_docs.rename(columns={"len_para":"len_docs"})
cal_len_docs = cal_len_docs.reset_index(drop=False)
cal_len_docs.head(5)

Unnamed: 0,file_name,len_docs
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,1355259
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,324354
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,803392
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,1286394
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,490919


In [40]:
# calculate weighted average for sentiment
# -----> merge the cal_len_docs
merge_df = cal_len_docs.merge(df, how='inner', on='file_name')
# -----> caculate the weight of the sentiment
merge_df['weighted']=merge_df['len_sent']/merge_df['len_docs']
# -----> give the weighted average of the sentiment score based on the sentence
merge_df['WA_sentiment'] = merge_df['weighted']*merge_df['sentiment']
# -----> sum WA sentiment of each files
sentiment_df = merge_df[['file_name','date','company_name','WA_sentiment']]
sentiment_df = sentiment_df.groupby(['file_name','date','company_name']).sum().reset_index(drop=False)
sentiment_df

Unnamed: 0,file_name,date,company_name,WA_sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.001179
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.012101
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.012195
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.006725
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.008690
...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.002443
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.002120
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.003111
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.000772


# merge the sentiment_df and topic_df together

In [41]:
# merge the sentiment_df and topic_df together
MD_model_df = topic_df.merge(sentiment_df, how='inner', on=['file_name','date','company_name'])
MD_model_df

Unnamed: 0,file_name,date,company_name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,WA_sentiment
0,20110202 _Mapfre_SA_Earnings_Call_SD0000000027...,2011-02-02,Mapfre SA,0.656253,0.012079,0.000904,0.012631,0.057451,0.030606,0.001391,0.163704,0.047936,0.017045,0.001179
1,20110202_LEGAL_-_GEN_GRP-_Guidance_Call_2011-2...,2011-02-02,LEGAL -,0.202508,0.093453,0.002578,0.120774,0.064953,0.018331,0.002840,0.003085,0.131828,0.359650,0.012101
2,20110203_Markel_Corp-_Earnings_Call_2011-2-3_S...,2011-02-03,Markel Corp-,0.017086,0.072191,0.001909,0.132086,0.315447,0.036262,0.054925,0.034294,0.316347,0.019453,0.012195
3,20110208_Beazley_PLC-_Earnings_Call_2011-2-8_S...,2011-02-08,Beazley PLC-,0.003291,0.036449,0.001548,0.069775,0.160342,0.001503,0.075499,0.006455,0.266193,0.378945,0.006725
4,20110209_INTACT_FINANCIAL-_Earnings_Call_2011-...,2011-02-09,INTACT FINANCIAL-,0.003687,0.020716,0.180356,0.005106,0.120406,0.001684,0.538324,0.001071,0.051059,0.077589,0.008690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1980,20220310_Sanlam_Ltd-_Earnings_Call_2022-3-10_D...,2022-03-10,Sanlam Ltd-,0.212463,0.006642,0.242472,0.122330,0.070116,0.149912,0.029469,0.012601,0.045702,0.108293,0.002443
1981,20220323_Poste_Italiane_SpA-_Earnings_Call_202...,2022-03-23,Poste Italiane,0.074169,0.003564,0.000486,0.428340,0.007244,0.021312,0.004922,0.324300,0.086397,0.049266,0.002120
1982,20220324_Helvetia_Holding_AG-_Earnings_Call_20...,2022-03-24,Helvetia Holding,0.310859,0.003878,0.001142,0.411355,0.001352,0.000720,0.062917,0.043701,0.002306,0.161770,0.003111
1983,20220426_Tryg_A-S-_Earnings_Call_2022-4-26_DN0...,2022-04-26,Tryg A-S-,0.133917,0.015293,0.525656,0.080875,0.017488,0.001255,0.005953,0.000798,0.004018,0.214747,0.000772


# Stock Price

In [42]:
# List all yahoo tickers
yahoo_ticker_list = [
    # Motor/Personal
    'ADM.L','DLG.L','SBRE.L','SAGA.L','AGS.BR',
    # Global Commercial
    'ALV.DE','CS.PA','ZURN.SW','G.MI',
    # London Market 
    'BEZ.L','HSX.L','LRE.L',
    # LN Equity 
    'LLOY.L',
    # US - Specialty/P&C/Reinsurance
    'AIG','AXS','TRV','ACGL','RNR','RE','MKL','HIG','ARGO','BRK-B','CB',
    # European (Re)Insurers
    'SCR.PA','MUV2.DE','SREN.SW','HNR1.DE',
    # Japanese & Pacific
    '8766.T','8630.T','8725.T','QBE.AX',
    # Run-off
    'ESGR','FFH.TO','RQIH.L',
    # Life Groups & Retail Life
    'PRU.L','MNG.L','LGEN.L','AV.L','PHNX.L','QLT.L','JUST.L','STJ.L','AGN.AS','DSY.JO','SLM.JO',
    # Other insurers
    'STB.OL','CNP.PA','GJF.OL','PST.MI','NN.AS','TOP.CO','BALN.SW',
    'SAMPO.HE','MAP.MC','TRYG.CO','0RHS.IL','HELN.SW','IFC.TO'
]

# Match it with company names from BoE
Insurer_Names_df = pd.read_excel('./input/Insurer_Names_for_possible_NLP_analysis.xlsx')  
Insurer_Names = Insurer_Names_df.drop(['Unnamed: 0', 'Unnamed: 4'], axis=1)
Insurer_Names = Insurer_Names.drop([0])
Insurer_Names.rename(columns={"Unnamed: 1": "Company", "Unnamed: 2": "bb ticker", "Unnamed: 3":"Group"}, inplace = True)
Insurer_Names['yahoo ticker'] = yahoo_ticker_list
Insurer_Names.head(5)
# Exported as CSV to manually matched with the extracted file names (column 'company_name' in model_df)
# Insurer_Names[['Company']].to_csv('./main_df_output/company_name_match.csv', index = False)

Unnamed: 0,Company,bb ticker,Group,yahoo ticker
1,ADMIRAL GROUP,ADM LN,Motor/Personal,ADM.L
2,DIRECT LINE INSU,DLG LN,Motor/Personal,DLG.L
3,SABRE INSUR,SBRE LN,Motor/Personal,SBRE.L
4,SAGA PLC,SAGA LN,Motor/Personal,SAGA.L
5,AGEAS,AGS BB,Motor/Personal,AGS.BR


In [43]:
company_name_match = pd.read_csv('./input/company_name_match.csv')
company_name_match.rename(columns={"Company": "company_name"}, inplace = True)
company_name_match

Groups = {
        "ADMIRAL GROUP" : "Motor/Personal", "DIRECT LINE INSU" : "Motor/Personal", 
        "SABRE INSUR" : "Motor/Personal", "SAGA PLC" : "Motor/Personal",
        "AGEAS" : "Motor/Personal",

        "ALLIANZ SE-REG" : "Global Commercial", "AXA" : "Global Commercial", 
        "ZURICH INSURANCE" : "Global Commercial", "GENERALI ASSIC" : "Global Commercial",

        "BEAZLEY PLC" : "London Market", "HISCOX LTD" : "London Market", 
        "LANCASHIRE HOLDI" : "London Market",
        
        "Society of Lloyd’s" : "LN Equity",

        "AMERICAN INTERNA" : "US - Specialty/P&C/Reinsurance", "AXIS CAPITAL" : "US - Specialty/P&C/Reinsurance",
        "TRAVELERS COS IN" : "US - Specialty/P&C/Reinsurance", "ARCH CAPITAL GRP" : "US - Specialty/P&C/Reinsurance",
        "RENAISSANCERE" : "US - Specialty/P&C/Reinsurance", "EVEREST RE GROUP" : "US - Specialty/P&C/Reinsurance",
        "MARKEL CORP" : "US - Specialty/P&C/Reinsurance", "HARTFORD FINL SV" : "US - Specialty/P&C/Reinsurance",
        "ARGO GROUP INTER" : "US - Specialty/P&C/Reinsurance", "BERKSHIRE HATH-B" : "US - Specialty/P&C/Reinsurance",
        "CHUBB LTD" : "US - Specialty/P&C/Reinsurance",

        "SCOR SE" : "European (Re)Insurers", "MUENCHENER RUE-R" : "European (Re)Insurers", 
        "SWISS RE AG" : "European (Re)Insurers", "HANNOVER RUECK S" : "European (Re)Insurers",

        "TOKIO MARINE HD" : "Japanese & Pacific", "SOMPO HOLDINGS I" : "Japanese & Pacific",
        "MS&AD INSURANCE" : "Japanese & Pacific", "QBE INSURANCE" : "Japanese & Pacific",

        "ENSTAR GROUP LTD" : "Run-off", "FAIRFAX FINL HLD" : "Run-off", "RANDALL & QUILTE" : "Run-off",

        "PRUDENTIAL PLC" : "Life Groups & Retail Life", "M&G PLC" : "Life Groups & Retail Life",
        "LEGAL & GEN GRP" : "Life Groups & Retail Life", "AVIVA PLC" : "Life Groups & Retail Life",
        "PHOENIX GROUP HO" : "Life Groups & Retail Life", "QUILTER PLC" : "Life Groups & Retail Life",
        "JUST GROUP" : "Life Groups & Retail Life", "ST JAMES'S PLACE" : "Life Groups & Retail Life",
        "AEGON NV" : "Life Groups & Retail Life", "DISCOVERY LTD" : "Life Groups & Retail Life",
        "SANLAM LTD" : "Life Groups & Retail Life",

        "STOREBRAND ASA" : "Other insurers", "CNP ASSURANCES" : "Other insurers",
        "GJENSIDIGE FORSI" : "Other insurers", "POSTE ITALIANE" : "Other insurers",
        "NN GROUP" : "Other insurers", "TOPDANMARK A/S" : "Other insurers",
        "BALOISE HOL-REG" : "Other insurers", "SAMPO OYJ-A SHS" : "Other insurers",
        "MAPFRE SA" : "Other insurers", "TRYG A/S" : "Other insurers",
        "ASR NEDERLAND NV" : "Other insurers", "HELVETIA HOL-REG" : "Other insurers",
        "INTACT FINANCIAL" : "Other insurers"
}

yahoo_ticker = {
        "ADMIRAL GROUP" : "ADM.L", "DIRECT LINE INSU" : "DLG.L", 
        "SABRE INSUR" : "SBRE.L", "SAGA PLC" : "SAGA.L",
        "AGEAS" : "AGS.BR", 
        
         "ALLIANZ SE-REG" : "ALV.DE", "AXA" : "CS.PA", 
        "ZURICH INSURANCE" : "ZURN.SW", "GENERALI ASSIC" : "G.MI",

        "BEAZLEY PLC" : "BEZ.L", "HISCOX LTD" : "HSX.L", 
        "LANCASHIRE HOLDI" : "LRE.L",

        "Society of Lloyd’s" : "LLOY.L",

        "AMERICAN INTERNA" : "AIG", "AXIS CAPITAL" : "AXS",
        "TRAVELERS COS IN" : "TRV", "ARCH CAPITAL GRP" : "ACGL",
        "RENAISSANCERE" : "RNR", "EVEREST RE GROUP" : "RE",
        "MARKEL CORP" : "MKL", "HARTFORD FINL SV" : "HIG",
        "ARGO GROUP INTER" : "ARGO", "BERKSHIRE HATH-B" : "BRK-B",
        "CHUBB LTD" : "CB",

        "SCOR SE" : "SCR.PA", "MUENCHENER RUE-R" : "MUV2.DE", 
        "SWISS RE AG" : "SREN.SW", "HANNOVER RUECK S" : "HNR1.DE",

        "TOKIO MARINE HD" : "8766.T", "SOMPO HOLDINGS I" : "8630.T",
        "MS&AD INSURANCE" : "8725.T", "QBE INSURANCE" : "QBE.AX",

        "ENSTAR GROUP LTD" : "ESGR", "FAIRFAX FINL HLD" : "FFH.TO", "RANDALL & QUILTE" : "RQIH.L",

        "PRUDENTIAL PLC" : "PRU.L", "M&G PLC" : "MNG.L",
        "LEGAL & GEN GRP" : "LGEN.L", "AVIVA PLC" : "AV.L",
        "PHOENIX GROUP HO" : "PHNX.L", "QUILTER PLC" : "QLT.L",
        "JUST GROUP" : "JUST.L", "ST JAMES'S PLACE" : "STJ.L",
        "AEGON NV" : "AGN.AS", "DISCOVERY LTD" : "DSY.JO",
        "SANLAM LTD" : "SLM.JO",

        "STOREBRAND ASA" : "STB.OL", "CNP ASSURANCES" : "CNP.PA",
        "GJENSIDIGE FORSI" : "GJF.OL", "POSTE ITALIANE" : "PST.MI",
        "NN GROUP" : "NN.AS", "TOPDANMARK A/S" : "TOP.CO",
        "BALOISE HOL-REG" : "BALN.SW", "SAMPO OYJ-A SHS" : "SAMPO.HE",
        "MAPFRE SA" : "MAP.MC", "TRYG A/S" : "TRYG.CO",
        "ASR NEDERLAND NV" : "0RHS.IL", "HELVETIA HOL-REG" : "HELN.SW",
        "INTACT FINANCIAL" : "IFC.TO"
}

company_name_match['Group'] =company_name_match['Real_Company_Name'].map(Groups)
company_name_match['yfiance_ticker'] =company_name_match['Real_Company_Name'].map(yahoo_ticker)
company_name_match

Unnamed: 0,Real_Company_Name,company_name,Group,yfiance_ticker
0,ADMIRAL GROUP,Admiral Group,Motor/Personal,ADM.L
1,DIRECT LINE INSU,DIRECT LINE,Motor/Personal,DLG.L
2,SABRE INSUR,SABRE INSUR-,Motor/Personal,SBRE.L
3,SAGA PLC,Saga PLC-,Motor/Personal,SAGA.L
4,AGEAS,Ageas SA-NV-,Motor/Personal,AGS.BR
...,...,...,...,...
62,MAPFRE SA,,Other insurers,MAP.MC
63,TRYG A/S,Tryg A-S-,Other insurers,TRYG.CO
64,ASR NEDERLAND NV,ASR Nederland,Other insurers,0RHS.IL
65,HELVETIA HOL-REG,Helvetia Holding,Other insurers,HELN.SW


In [44]:
df_company = company_name_match[['company_name','Group','yfiance_ticker']]
MD_model_df = df_company.merge(MD_model_df, how='inner', on='company_name')
MD_model_df = MD_model_df.rename(columns={'yfiance_ticker':'ticker'})
MD_model_df

Unnamed: 0,company_name,Group,ticker,file_name,date,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,WA_sentiment
0,Admiral Group,Motor/Personal,ADM.L,20110302_Admiral_Group_PLC-_Earnings_Call_2011...,2011-03-02,0.000940,0.012294,0.398791,0.008481,0.009419,0.056247,0.005848,0.000273,0.295048,0.212659,0.000392
1,Admiral Group,Motor/Personal,ADM.L,20110824_Admiral_Group_PLC-_Earnings_Call_2011...,2011-08-24,0.005148,0.002558,0.314133,0.008527,0.037732,0.054996,0.000857,0.000344,0.266681,0.309022,0.000765
2,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.002408,0.003127,0.001132,0.088902,0.002066,0.037772,0.001742,0.000700,0.296897,0.565255,-0.000433
3,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.004044,0.001966,0.398870,0.017938,0.009261,0.022735,0.025120,0.008021,0.252457,0.259588,0.000336
4,Admiral Group,Motor/Personal,ADM.L,20120307_Admiral_Group_PLC-_Earnings_Call_2012...,2012-03-07,0.019652,0.017997,0.460937,0.081705,0.024005,0.007314,0.001035,0.000415,0.162040,0.224901,0.001669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1870,INTACT FINANCIAL-,Other insurers,IFC.TO,20210210_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-02-10,0.008426,0.016922,0.007051,0.368703,0.227906,0.046338,0.276480,0.000754,0.003799,0.043621,0.008256
1871,INTACT FINANCIAL-,Other insurers,IFC.TO,20210512_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-05-12,0.002075,0.010299,0.114369,0.181040,0.061734,0.027645,0.466451,0.000603,0.127851,0.007933,0.005915
1872,INTACT FINANCIAL-,Other insurers,IFC.TO,20210728_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-07-28,0.065150,0.013062,0.001080,0.302898,0.074113,0.012691,0.451731,0.000667,0.054448,0.024158,0.005511
1873,INTACT FINANCIAL-,Other insurers,IFC.TO,20211110_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-11-10,0.071902,0.013270,0.155491,0.312667,0.044191,0.049608,0.320103,0.000612,0.008379,0.023777,0.004812


In [45]:
start_date='2010-11-01'
end_date='2022-03-02'
stock_list = company_name_match['yfiance_ticker']
df_stock = stock_price_change(stock_list, start_date, end_date)

# limit the dat till 2021-12-31
df_stock = df_stock[df_stock['date']<='2021-12-31']

df_stock


1 Failed download:
- CNP.PA: No data found, symbol may be delisted


Unnamed: 0,date,Close,ticker,D-1,D+1,D-2,D+2,D-3,D+3,D-5,D+5,D-10,D+10,D-15,D+15,D-16,D+16,D-17,D+17,D-18,D+18,D-19,D+19,D-20,D+20,D-21,D+21,D-22,D+22,D-23,D+23,D-24,D+24,D-25,D+25,D-26,D+26,D-27,D+27,D-28,D+28,D-29,D+29,D-30,D+30,D-40,D+40,D-50,D+50,D-60,D+60
0,2010-11-01,1611.000000,ADM.L,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
1,2010-11-02,1627.000000,ADM.L,0.009932,0.026429,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2,2010-11-03,1670.000000,ADM.L,0.026429,0.005988,0.036623,0.013772,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
3,2010-11-04,1680.000000,ADM.L,0.005988,0.007738,0.032575,0.000000,0.042831,-0.005357,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
4,2010-11-05,1693.000000,ADM.L,0.007738,-0.007679,0.013772,-0.012995,0.040565,-0.035440,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2798,2021-12-23,163.240005,IFC.TO,-0.001101,0.001470,0.001841,0.008025,0.012090,0.006493,0.005049,0.002940,0.012153,0.010475,0.004678,-0.010843,0.019358,-0.011884,0.029775,0.002573,0.011651,0.006493,0.006164,0.021196,-0.005483,0.022421,-0.007177,0.023401,-0.011745,0.034244,-0.011805,0.041289,-0.012880,0.055072,-0.021402,0.056849,-0.032308,0.062607,-0.031791,0.059177,-0.020227,0.072838,-0.032365,0.071735,-0.040273,0.090419,-0.031619,0.109777,0.0,0.0,0.0,0.0
2799,2021-12-24,163.479996,IFC.TO,0.001470,0.006545,0.000367,0.005016,0.003314,0.005750,0.003684,0.000856,0.013138,0.004037,0.006774,-0.013335,0.006155,0.001101,0.020857,0.005016,0.031289,0.019697,0.013138,0.020920,0.007643,0.021899,-0.004021,0.032726,-0.005717,0.039760,-0.010292,0.053523,-0.010352,0.055297,-0.011429,0.061047,-0.019963,0.057622,-0.030885,0.071263,-0.030368,0.070161,-0.018786,0.088818,-0.030943,0.132310,-0.014647,0.110533,0.0,0.0,0.0,0.0
2800,2021-12-29,164.550003,IFC.TO,0.006545,-0.001519,0.008025,-0.000790,0.006915,-0.005044,0.020212,0.011972,0.020022,-0.002492,0.013114,-0.005409,0.013364,-0.001519,0.012740,0.013066,0.027538,0.014281,0.038039,0.015254,0.019769,0.026010,0.014238,0.032999,0.002498,0.046673,0.000791,0.048435,-0.003814,0.054148,-0.003874,0.050744,-0.004959,0.064297,-0.013548,0.063203,-0.024542,0.081738,-0.024021,0.124947,-0.012364,0.122455,-0.001880,0.108781,0.0,0.0,0.0,0.0
2801,2021-12-30,164.300003,IFC.TO,-0.001519,0.000730,0.005016,-0.003530,0.006493,-0.004139,0.008347,0.003956,0.033984,-0.022581,0.004586,0.000000,0.011575,0.014607,0.011824,0.015825,0.011201,0.016799,0.025977,0.027572,0.036462,0.034571,0.018220,0.048265,0.012697,0.050030,0.000975,0.055752,-0.000730,0.052343,-0.005327,0.065916,-0.005388,0.064820,-0.006470,0.083384,-0.015047,0.126659,-0.026024,0.124163,-0.025504,0.118077,-0.004725,0.106452,0.0,0.0,0.0,0.0


In [46]:
df_stock['date'] = df_stock['date'].astype(str)
# merged company tickers into big dataframe
MD_model_df = MD_model_df.join(df_stock.set_index(["date","ticker"]), 
                         on=["date","ticker"],
                         how='left'
                        )
MD_model_df = MD_model_df.dropna()
MD_model_df = MD_model_df.reset_index(drop=True)
MD_model_df

Unnamed: 0,company_name,Group,ticker,file_name,date,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,WA_sentiment,Close,D-1,D+1,D-2,D+2,D-3,D+3,D-5,D+5,D-10,D+10,D-15,D+15,D-16,D+16,D-17,D+17,D-18,D+18,D-19,D+19,D-20,D+20,D-21,D+21,D-22,D+22,D-23,D+23,D-24,D+24,D-25,D+25,D-26,D+26,D-27,D+27,D-28,D+28,D-29,D+29,D-30,D+30,D-40,D+40,D-50,D+50,D-60,D+60
0,Admiral Group,Motor/Personal,ADM.L,20110302_Admiral_Group_PLC-_Earnings_Call_2011...,2011-03-02,0.000940,0.012294,0.398791,0.008481,0.009419,0.056247,0.005848,0.000273,0.295048,0.212659,0.000392,1655.000000,-0.031030,0.013897,-0.020130,0.015106,-0.003612,-0.000604,-0.011350,-0.003021,-0.029326,-0.060423,-0.030463,-0.045317,-0.034422,-0.046526,-0.037231,-0.047734,-0.028186,-0.044713,-0.024175,-0.060423,-0.045009,-0.057402,-0.032164,-0.061027,0.007304,-0.026586,0.010996,-0.026586,-0.003012,-0.028399,-0.001809,-0.021752,0.010996,-0.031420,0.003639,-0.025378,-0.000604,-0.025378,0.020345,-0.028399,0.022236,-0.015710,0.057508,0.022356,0.067054,0.059819,0.030511,0.041088
1,Admiral Group,Motor/Personal,ADM.L,20110824_Admiral_Group_PLC-_Earnings_Call_2011...,2011-08-24,0.005148,0.002558,0.314133,0.008527,0.037732,0.054996,0.000857,0.000344,0.266681,0.309022,0.000765,1353.000000,-0.118567,-0.054693,-0.110454,-0.025868,-0.094983,-0.005913,-0.125969,0.011086,-0.031496,0.008130,-0.117417,-0.028825,-0.099800,-0.041390,-0.119141,-0.056911,-0.126533,-0.048041,-0.127097,-0.070214,-0.111038,-0.090909,-0.131579,-0.060606,-0.133803,-0.014782,-0.151192,0.021434,-0.147985,-0.025129,-0.128783,-0.035477,-0.127097,-0.066519,-0.118567,-0.070214,-0.146910,-0.093126,-0.143129,-0.066519,-0.125404,-0.069475,-0.178007,-0.108647,-0.208309,-0.124169,-0.205520,-0.390244
2,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.002408,0.003127,0.001132,0.088902,0.002066,0.037772,0.001742,0.000700,0.296897,0.565255,-0.000433,887.500000,-0.256077,-0.076056,-0.247881,-0.053521,-0.251686,-0.061972,-0.234914,-0.098028,-0.276691,-0.025352,-0.275510,0.039437,-0.285427,0.027606,-0.296751,0.027042,-0.297308,0.045070,-0.284274,0.003380,-0.283118,-0.036056,-0.279041,-0.055211,-0.287721,-0.037183,-0.283118,-0.070986,-0.295075,-0.067042,-0.297308,-0.113239,-0.276691,-0.098028,-0.294515,-0.096338,-0.297308,-0.098592,-0.319923,-0.061408,-0.327142,-0.077183,-0.303375,-0.069859,-0.349817,0.018028,-0.420314,0.125070
3,Admiral Group,Motor/Personal,ADM.L,20111109_Admiral_Group_PLC-_Guidance_Call_2011...,2011-11-09,0.004044,0.001966,0.398870,0.017938,0.009261,0.022735,0.025120,0.008021,0.252457,0.259588,0.000336,887.500000,-0.256077,-0.076056,-0.247881,-0.053521,-0.251686,-0.061972,-0.234914,-0.098028,-0.276691,-0.025352,-0.275510,0.039437,-0.285427,0.027606,-0.296751,0.027042,-0.297308,0.045070,-0.284274,0.003380,-0.283118,-0.036056,-0.279041,-0.055211,-0.287721,-0.037183,-0.283118,-0.070986,-0.295075,-0.067042,-0.297308,-0.113239,-0.276691,-0.098028,-0.294515,-0.096338,-0.297308,-0.098592,-0.319923,-0.061408,-0.327142,-0.077183,-0.303375,-0.069859,-0.349817,0.018028,-0.420314,0.125070
4,Admiral Group,Motor/Personal,ADM.L,20120307_Admiral_Group_PLC-_Earnings_Call_2012...,2012-03-07,0.019652,0.017997,0.460937,0.081705,0.024005,0.007314,0.001035,0.000415,0.162040,0.224901,0.001669,1144.000000,0.100000,0.015734,0.095785,0.012238,0.080264,0.022727,0.062210,0.036713,0.103182,0.020979,0.179990,0.014860,0.181818,0.009615,0.181818,0.037587,0.186722,0.039336,0.161421,0.055070,0.186107,0.039336,0.175745,0.055070,0.145719,0.044580,0.102119,0.051573,0.189189,0.063811,0.226152,0.034091,0.215728,0.026224,0.278927,0.045455,0.242803,0.056818,0.243478,0.056818,0.266888,0.070804,0.354648,0.013986,0.375827,-0.048951,0.338795,-0.055944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,INTACT FINANCIAL-,Other insurers,IFC.TO,20201104_INTACT_FINANCIAL-_Earnings_Call_2020-...,2020-11-04,0.045370,0.021419,0.029721,0.222770,0.080326,0.084131,0.384665,0.001437,0.100654,0.029507,0.003004,147.750000,0.063332,-0.018274,0.069258,-0.019425,0.073609,-0.060778,0.063179,-0.080203,0.031990,0.000203,0.026255,-0.019628,0.021149,-0.021997,0.018123,-0.027411,0.025258,-0.016650,0.026541,-0.005753,0.035679,0.005008,0.024974,0.024095,0.030335,0.062403,0.031486,0.042166,0.036260,0.050829,0.042181,0.042301,0.034157,0.033841,0.044243,0.031743,0.055508,0.008460,0.049212,0.007648,0.024050,0.015499,0.082021,0.013536,0.041887,-0.026058,0.030119,-0.038376
1808,INTACT FINANCIAL-,Other insurers,IFC.TO,20210210_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-02-10,0.008426,0.016922,0.007051,0.368703,0.227906,0.046338,0.276480,0.000754,0.003799,0.043621,0.008256,151.820007,0.034267,-0.013371,0.041432,-0.016994,0.039294,-0.032275,0.050876,-0.041101,0.061678,-0.054802,0.054013,-0.033263,0.059086,-0.012976,0.055038,0.013437,0.055478,0.018179,0.062942,0.014688,0.062347,0.015676,0.060196,0.007377,0.061307,0.003030,0.050876,0.009748,0.048119,-0.006982,0.028312,-0.008563,0.007098,-0.004874,0.013823,-0.003425,0.007298,0.004018,0.015451,0.007575,0.006163,0.013173,-0.004067,0.047029,0.056507,0.076999,0.036385,0.086418
1809,INTACT FINANCIAL-,Other insurers,IFC.TO,20210512_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-05-12,0.002075,0.010299,0.114369,0.181040,0.061734,0.027645,0.466451,0.000603,0.127851,0.007933,0.005915,162.309998,-0.016541,-0.011460,-0.027909,-0.011521,-0.015945,-0.015218,-0.009157,-0.027663,-0.003744,0.002403,-0.002704,0.041341,-0.000616,0.054464,0.002099,0.040478,-0.006914,0.033208,0.000987,0.032469,0.012034,0.039554,0.012097,0.039123,0.012602,0.033023,0.021074,0.032530,0.030213,0.039800,0.034678,0.039246,0.039982,0.025876,0.047161,0.041957,0.035272,0.053601,0.053961,0.051075,0.053756,0.041094,0.058774,0.059885,0.110875,0.038753,0.104751,0.044544
1810,INTACT FINANCIAL-,Other insurers,IFC.TO,20210728_INTACT_FINANCIAL-_Earnings_Call_2021-...,2021-07-28,0.065150,0.013062,0.001080,0.302898,0.074113,0.012691,0.451731,0.000667,0.054448,0.024158,0.005511,169.880005,0.001828,-0.002943,0.008669,0.000706,0.007592,-0.002826,0.000294,0.000530,0.001238,0.026725,-0.010427,0.036320,-0.012785,0.042913,-0.002173,0.027843,0.000943,0.022781,0.008729,0.021839,-0.001821,0.025606,-0.006666,0.020132,0.003841,0.015364,0.005326,0.011479,-0.004220,0.016541,-0.006608,0.024782,0.004494,0.018601,0.020239,0.017954,0.007114,0.024488,0.006577,0.021545,0.013664,0.027372,0.046639,0.003826,0.062813,-0.022251,0.031764,-0.002001


In [47]:
MD_model_df.to_csv("./regression_df_input/MD_model_df.csv", index = False)