#### Import necessary libraries

In [None]:

# Library for opening url and creating 
# requests
import urllib.request

# pretty-print python data structures
from pprint import pprint

# for parsing all the tables present 
# on the website
from html_table_parser.parser import HTMLTableParser

from yahoofinancials import YahooFinancials

# for converting the parsed data in a
# pandas dataframe
import pandas as pd
import os

#### Defining a function to get contents of the website

In [None]:

# Opens a website and read its
# binary contents (HTTP Response Body)
def url_get_contents(url):

    # Opens a website and read its
    # binary contents (HTTP Response Body)

    #making request to the website
    req = urllib.request.Request(url=url)
    f = urllib.request.urlopen(req)

    #reading contents of the website
    return f.read()

Get Historical Adjusted Close prices from Yahoo Finance for the 20 companies

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# Import the libraries
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
warnings.simplefilter(action='ignore', category=FutureWarning)

Download the stock prices using Yahoo Financials

In [None]:
assets = ['ASIANPAINT.NS', 'HINDUNILVR.NS', 'ITC.NS', 'NESTLEIND.NS', 'DABUR.NS', 'BRITANNIA.NS',
            'EMAMILTD.NS', 'MARICO.NS', 'WIPRO.NS', 'GODREJCP.NS', 'GODFRYPHLP.NS', 'TATACONSUM.NS',
            'JUBLFOOD.NS', 'UBL.NS', 'AVANTIFEED.NS', 'VADILALIND.NS', 'DAAWAT.NS',  'COLPAL.NS',
            'PIDILITIND.NS', 'MCDOWELL-N.NS'           
        ]
yahoo_financials = YahooFinancials(assets)

data = yahoo_financials.get_historical_price_data(start_date='2013-12-31', 
                                                  end_date='2020-04-01', 
                                                  time_interval='monthly')

prices_df = pd.DataFrame({
    a: {x['formatted_date']: x['adjclose'] for x in data[a]['prices']} for a in assets
})

prices_df_T = prices_df.transpose()
prices_df_T

In [None]:
# Convert to Quarterly data.

prices_df_T_qtr = (prices_df_T.groupby(pd.PeriodIndex(prices_df_T.columns, freq='Q'), axis=1)
        .mean()
        .rename(columns=lambda c: str(c).lower()))

prices_df_qtr = prices_df_T_qtr.transpose()
prices_df_qtr.reset_index(inplace=True)
prices_df_qtr

In [None]:
# Quarter difference in prices with gain/loss % for Asian Paints
prices_df_qtr_asian = pd.DataFrame(prices_df_qtr['ASIANPAINT.NS'])
prices_df_qtr_asian['1q_avg_diff'] = (-prices_df_qtr_asian['ASIANPAINT.NS'].diff(periods=-1))
prices_df_qtr_asian['gain/loss %'] = (prices_df_qtr_asian['1q_avg_diff']*100)/prices_df_qtr_asian['ASIANPAINT.NS']

# Quarter difference in prices with gain/loss % for HUL
prices_df_qtr_hul = pd.DataFrame(prices_df_qtr['HINDUNILVR.NS'])
prices_df_qtr_hul['1q_avg_diff'] = (-prices_df_qtr_hul['HINDUNILVR.NS'].diff(periods=-1))
prices_df_qtr_hul['gain/loss %'] = (prices_df_qtr_hul['1q_avg_diff']*100)/prices_df_qtr_hul['HINDUNILVR.NS']

# Quarter difference in prices with gain/loss % for ITC
prices_df_qtr_itc = pd.DataFrame(prices_df_qtr['ITC.NS'])
prices_df_qtr_itc['1q_avg_diff'] = (-prices_df_qtr_itc['ITC.NS'].diff(periods=-1))
prices_df_qtr_itc['gain/loss %'] = (prices_df_qtr_itc['1q_avg_diff']*100)/prices_df_qtr_itc['ITC.NS']

# Quarter difference in prices with gain/loss % for Nestle
prices_df_qtr_nestle = pd.DataFrame(prices_df_qtr['NESTLEIND.NS'])
prices_df_qtr_nestle['1q_avg_diff'] = (-prices_df_qtr_nestle['NESTLEIND.NS'].diff(periods=-1))
prices_df_qtr_nestle['gain/loss %'] = (prices_df_qtr_nestle['1q_avg_diff']*100)/prices_df_qtr_nestle['NESTLEIND.NS']

# Quarter difference in prices with gain/loss % for Dabur
prices_df_qtr_dabur = pd.DataFrame(prices_df_qtr['DABUR.NS'])
prices_df_qtr_dabur['1q_avg_diff'] = (-prices_df_qtr_dabur['DABUR.NS'].diff(periods=-1))
prices_df_qtr_dabur['gain/loss %'] = (prices_df_qtr_dabur['1q_avg_diff']*100)/prices_df_qtr_dabur['DABUR.NS']

# Quarter difference in prices with gain/loss % for Britannia
prices_df_qtr_brit = pd.DataFrame(prices_df_qtr['BRITANNIA.NS'])
prices_df_qtr_brit['1q_avg_diff'] = (-prices_df_qtr_brit['BRITANNIA.NS'].diff(periods=-1))
prices_df_qtr_brit['gain/loss %'] = (prices_df_qtr_brit['1q_avg_diff']*100)/prices_df_qtr_brit['BRITANNIA.NS']

# Quarter difference in prices with gain/loss % for Emami
prices_df_qtr_emami = pd.DataFrame(prices_df_qtr['EMAMILTD.NS'])
prices_df_qtr_emami['1q_avg_diff'] = (-prices_df_qtr_emami['EMAMILTD.NS'].diff(periods=-1))
prices_df_qtr_emami['gain/loss %'] = (prices_df_qtr_emami['1q_avg_diff']*100)/prices_df_qtr_emami['EMAMILTD.NS']

# Quarter difference in prices with gain/loss % for Marico
prices_df_qtr_marico = pd.DataFrame(prices_df_qtr['MARICO.NS'])
prices_df_qtr_marico['1q_avg_diff'] = (-prices_df_qtr_marico['MARICO.NS'].diff(periods=-1))
prices_df_qtr_marico['gain/loss %'] = (prices_df_qtr_marico['1q_avg_diff']*100)/prices_df_qtr_marico['MARICO.NS']

# Quarter difference in prices with gain/loss % for Wipro
prices_df_qtr_wipro = pd.DataFrame(prices_df_qtr['WIPRO.NS'])
prices_df_qtr_wipro['1q_avg_diff'] = (-prices_df_qtr_wipro['WIPRO.NS'].diff(periods=-1))
prices_df_qtr_wipro['gain/loss %'] = (prices_df_qtr_wipro['1q_avg_diff']*100)/prices_df_qtr_wipro['WIPRO.NS']

# Quarter difference in prices with gain/loss % for Godrej CP
prices_df_qtr_god = pd.DataFrame(prices_df_qtr['GODREJCP.NS'])
prices_df_qtr_god['1q_avg_diff'] = (-prices_df_qtr_god['GODREJCP.NS'].diff(periods=-1))
prices_df_qtr_god['gain/loss %'] = (prices_df_qtr_god['1q_avg_diff']*100)/prices_df_qtr_god['GODREJCP.NS']

# Quarter difference in prices with gain/loss % for Godfry Philips
prices_df_qtr_gfp = pd.DataFrame(prices_df_qtr['GODFRYPHLP.NS'])
prices_df_qtr_gfp['1q_avg_diff'] = (-prices_df_qtr_gfp['GODFRYPHLP.NS'].diff(periods=-1))
prices_df_qtr_gfp['gain/loss %'] = (prices_df_qtr_gfp['1q_avg_diff']*100)/prices_df_qtr_gfp['GODFRYPHLP.NS']

# Quarter difference in prices with gain/loss % for TATA Consumer
prices_df_qtr_tata = pd.DataFrame(prices_df_qtr['TATACONSUM.NS'])
prices_df_qtr_tata['1q_avg_diff'] = (-prices_df_qtr_tata['TATACONSUM.NS'].diff(periods=-1))
prices_df_qtr_tata['gain/loss %'] = (prices_df_qtr_tata['1q_avg_diff']*100)/prices_df_qtr_tata['TATACONSUM.NS']

# Quarter difference in prices with gain/loss % for Jubilant
prices_df_qtr_jub = pd.DataFrame(prices_df_qtr['JUBLFOOD.NS'])
prices_df_qtr_jub['1q_avg_diff'] = (-prices_df_qtr_jub['JUBLFOOD.NS'].diff(periods=-1))
prices_df_qtr_jub['gain/loss %'] = (prices_df_qtr_jub['1q_avg_diff']*100)/prices_df_qtr_jub['JUBLFOOD.NS']

# Quarter difference in prices with gain/loss % for UBL
prices_df_qtr_ubl = pd.DataFrame(prices_df_qtr['UBL.NS'])
prices_df_qtr_ubl['1q_avg_diff'] = (-prices_df_qtr_ubl['UBL.NS'].diff(periods=-1))
prices_df_qtr_ubl['gain/loss %'] = (prices_df_qtr_ubl['1q_avg_diff']*100)/prices_df_qtr_ubl['UBL.NS']

# Quarter difference in prices with gain/loss % for Avanti Feed
prices_df_qtr_ava = pd.DataFrame(prices_df_qtr['AVANTIFEED.NS'])
prices_df_qtr_ava['1q_avg_diff'] = (-prices_df_qtr_ava['AVANTIFEED.NS'].diff(periods=-1))
prices_df_qtr_ava['gain/loss %'] = (prices_df_qtr_ava['1q_avg_diff']*100)/prices_df_qtr_ava['AVANTIFEED.NS']

# Quarter difference in prices with gain/loss % for Vadilal
prices_df_qtr_vad = pd.DataFrame(prices_df_qtr['VADILALIND.NS'])
prices_df_qtr_vad['1q_avg_diff'] = (-prices_df_qtr_vad['VADILALIND.NS'].diff(periods=-1))
prices_df_qtr_vad['gain/loss %'] = (prices_df_qtr_vad['1q_avg_diff']*100)/prices_df_qtr_vad['VADILALIND.NS']

# Quarter difference in prices with gain/loss % for Daawat
prices_df_qtr_daa = pd.DataFrame(prices_df_qtr['DAAWAT.NS'])
prices_df_qtr_daa['1q_avg_diff'] = (-prices_df_qtr_daa['DAAWAT.NS'].diff(periods=-1))
prices_df_qtr_daa['gain/loss %'] = (prices_df_qtr_daa['1q_avg_diff']*100)/prices_df_qtr_daa['DAAWAT.NS']

# Quarter difference in prices with gain/loss % for Colgate Palmolive
prices_df_qtr_col = pd.DataFrame(prices_df_qtr['COLPAL.NS'])
prices_df_qtr_col['1q_avg_diff'] = (-prices_df_qtr_col['COLPAL.NS'].diff(periods=-1))
prices_df_qtr_col['gain/loss %'] = (prices_df_qtr_col['1q_avg_diff']*100)/prices_df_qtr_col['COLPAL.NS']

# Quarter difference in prices with gain/loss % for Pidilite
prices_df_qtr_pid = pd.DataFrame(prices_df_qtr['PIDILITIND.NS'])
prices_df_qtr_pid['1q_avg_diff'] = (-prices_df_qtr_pid['PIDILITIND.NS'].diff(periods=-1))
prices_df_qtr_pid['gain/loss %'] = (prices_df_qtr_pid['1q_avg_diff']*100)/prices_df_qtr_pid['PIDILITIND.NS']

# Quarter difference in prices with gain/loss % for United Spirit
prices_df_qtr_us = pd.DataFrame(prices_df_qtr['MCDOWELL-N.NS'])
prices_df_qtr_us['1q_avg_diff'] = (-prices_df_qtr_us['MCDOWELL-N.NS'].diff(periods=-1))
prices_df_qtr_us['gain/loss %'] = (prices_df_qtr_us['1q_avg_diff']*100)/prices_df_qtr_us['MCDOWELL-N.NS']


Three External attributes (while submit change to github repository)

# 1. Index data NSE FMCG

In [None]:

nse_fmcg = pd.read_csv('Index_data (2).csv', encoding = 'utf-8')
nse_fmcg

nse_fmcg_T = nse_fmcg.transpose()
nse_fmcg_T

nse_fmcg_T.rename(columns=nse_fmcg_T.iloc[0]).drop(nse_fmcg_T.index[0])

df_nse = nse_fmcg_T.transpose()
df_nse

# Reverse the rows of the dataframe
df_nse_r = df_nse.iloc[::-1]
df_nse_r = df_nse_r.reset_index(drop=True)

df_nse_r_T = df_nse_r.transpose()
df_nse_r_T

df_nse_fmcg = df_nse_r_T.rename(columns=df_nse_r_T.iloc[0]).drop(df_nse_r_T.index[0])
df_nse_fmcg

df_nse_fmcg_qtr = (df_nse_fmcg.groupby(pd.PeriodIndex(df_nse_fmcg.columns, freq='Q'), axis=1)
        .mean()
        .rename(columns=lambda c: str(c).lower()))

df_nse_fmcg_qtr = df_nse_fmcg_qtr.transpose()
df_nse_fmcg_qtr.reset_index(inplace=True)
df_nse_fmcg_qtr.drop(['Open', 'High', 'Low'], axis=1, inplace=True)
df_nse_fmcg_qtr.rename(columns = {'Close':'NSE FMCG Index'}, inplace = True)
df_nse_fmcg_qtr.head(5)

df_nse_fmcg_qtr.to_csv("df_nse.csv",encoding="utf-8", index=False)

# 2. NASDAQ Index

In [None]:
nasdaq_index =  pd.read_csv('nasdaq_index.csv', encoding ='utf-8')
nasdaq_index.rename(columns = {'Adj Close':'NASDAQ Index'}, inplace = True)
nasdaq_index.head(5)


# 3. Crude Annual

In [None]:
crude_close =  pd.read_csv('crude_annual.csv', encoding ='utf-8')
crude_close.rename(columns = {'Avg Closing price':'Crude Oil in USD'}, inplace = True)
crude_close.head(5)

#### Parsing tables

- Consolidated Quarterly Results

#### 01. Asian Paints

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_asianpaints_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/2#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_asianpaints_qtr_1)


# Pandas Dataframe
asianpaints_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
asianpaints_df_qtr_1.drop(asianpaints_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_1.rename(columns=asianpaints_df_qtr_1.iloc[0], inplace = True)
asianpaints_df_qtr_1_drop = asianpaints_df_qtr_1.drop(asianpaints_df_qtr_1.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_1_drop = asianpaints_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
asianpaints_df_qtr_1_drop = asianpaints_df_qtr_1_drop.rename(columns = {'Mar 20':'Asian Paints Mar 20'})
asianpaints_df_qtr_1_drop = asianpaints_df_qtr_1_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_1_drop_T = asianpaints_df_qtr_1_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_1_drop_T.rename(columns=asianpaints_df_qtr_1_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_1_drop_T = asianpaints_df_qtr_1_drop_T.drop(asianpaints_df_qtr_1_drop_T.index[[0]])

xhtml_asianpaints_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/3#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_asianpaints_qtr_2)

# Pandas Dataframe
asianpaints_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
asianpaints_df_qtr_2.drop(asianpaints_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_2.rename(columns=asianpaints_df_qtr_2.iloc[0], inplace = True)
asianpaints_df_qtr_2_drop = asianpaints_df_qtr_2.drop(asianpaints_df_qtr_2.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_2_drop = asianpaints_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
asianpaints_df_qtr_2_drop = asianpaints_df_qtr_2_drop.rename(columns={asianpaints_df_qtr_2_drop.columns[1]: 'Asian Paints Dec 19', asianpaints_df_qtr_2_drop.columns[2]: 'Asian Paints Sep 19',
                            asianpaints_df_qtr_2_drop.columns[3]: 'Asian Paints Jun 19', asianpaints_df_qtr_2_drop.columns[4]: 'Asian Paints Mar 19', asianpaints_df_qtr_2_drop.columns[5]: 'Asian Paints Dec 18'})
#asianpaints_df_qtr_2_drop = asianpaints_df_qtr_2_drop.rename(columns = {'Mar 20':'Asian Paints Mar 20'})

asianpaints_df_qtr_2_drop = asianpaints_df_qtr_2_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_2_drop_T = asianpaints_df_qtr_2_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_2_drop_T.rename(columns=asianpaints_df_qtr_2_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_2_drop_T = asianpaints_df_qtr_2_drop_T.drop(asianpaints_df_qtr_2_drop_T.index[[0]])

xhtml_asianpaints_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/4#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_asianpaints_qtr_3)

# Pandas Dataframe
asianpaints_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
asianpaints_df_qtr_3.drop(asianpaints_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_3.rename(columns=asianpaints_df_qtr_3.iloc[0], inplace = True)
asianpaints_df_qtr_3_drop = asianpaints_df_qtr_3.drop(asianpaints_df_qtr_3.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_3_drop = asianpaints_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
asianpaints_df_qtr_3_drop

# Rename header to include company name
asianpaints_df_qtr_3_drop = asianpaints_df_qtr_3_drop.rename(columns={asianpaints_df_qtr_3_drop.columns[1]: 'Asian Paints Sep 18', asianpaints_df_qtr_3_drop.columns[2]: 'Asian Paints Jun 18',
                            asianpaints_df_qtr_3_drop.columns[3]: 'Asian Paints Mar 18', asianpaints_df_qtr_3_drop.columns[4]: 'Asian Paints Dec 17', asianpaints_df_qtr_3_drop.columns[5]: 'Asian Paints Sep 17'})

asianpaints_df_qtr_3_drop = asianpaints_df_qtr_3_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_3_drop_T = asianpaints_df_qtr_3_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_3_drop_T.rename(columns=asianpaints_df_qtr_3_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_3_drop_T = asianpaints_df_qtr_3_drop_T.drop(asianpaints_df_qtr_3_drop_T.index[[0]])

xhtml_asianpaints_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/5#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_asianpaints_qtr_4)

# Pandas Dataframe

asianpaints_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
asianpaints_df_qtr_4.drop(asianpaints_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_4.rename(columns=asianpaints_df_qtr_4.iloc[0], inplace = True)
asianpaints_df_qtr_4_drop = asianpaints_df_qtr_4.drop(asianpaints_df_qtr_4.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_4_drop = asianpaints_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
asianpaints_df_qtr_4_drop

# Rename header to include company name
asianpaints_df_qtr_4_drop = asianpaints_df_qtr_4_drop.rename(columns={asianpaints_df_qtr_4_drop.columns[1]: 'Asian Paints Jun 17', asianpaints_df_qtr_4_drop.columns[2]: 'Asian Paints Mar 17',
                            asianpaints_df_qtr_4_drop.columns[3]: 'Asian Paints Dec 16', asianpaints_df_qtr_4_drop.columns[4]: 'Asian Paints Sep 16', asianpaints_df_qtr_4_drop.columns[5]: 'Asian Paints Jun 16'})

asianpaints_df_qtr_4_drop = asianpaints_df_qtr_4_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_4_drop_T = asianpaints_df_qtr_4_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_4_drop_T.rename(columns=asianpaints_df_qtr_4_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_4_drop_T = asianpaints_df_qtr_4_drop_T.drop(asianpaints_df_qtr_4_drop_T.index[[0]])

xhtml_asianpaints_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/6#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_asianpaints_qtr_5)

# Pandas Dataframe

asianpaints_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
asianpaints_df_qtr_5.drop(asianpaints_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_5.rename(columns=asianpaints_df_qtr_5.iloc[0], inplace = True)
asianpaints_df_qtr_5_drop = asianpaints_df_qtr_5.drop(asianpaints_df_qtr_5.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_5_drop = asianpaints_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
asianpaints_df_qtr_5_drop

# Rename header to include company name
asianpaints_df_qtr_5_drop = asianpaints_df_qtr_5_drop.rename(columns={asianpaints_df_qtr_5_drop.columns[1]: 'Asian Paints Mar 16', asianpaints_df_qtr_5_drop.columns[2]: 'Asian Paints Dec 15',
                            asianpaints_df_qtr_5_drop.columns[3]: 'Asian Paints Sep 15', asianpaints_df_qtr_5_drop.columns[4]: 'Asian Paints Jun 15', asianpaints_df_qtr_5_drop.columns[5]: 'Asian Paints Mar 15'})

asianpaints_df_qtr_5_drop = asianpaints_df_qtr_5_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_5_drop_T = asianpaints_df_qtr_5_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_5_drop_T.rename(columns=asianpaints_df_qtr_5_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_5_drop_T = asianpaints_df_qtr_5_drop_T.drop(asianpaints_df_qtr_5_drop_T.index[[0]])

xhtml_asianpaints_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/asianpaints/results/quarterly-results/AP31/7#AP31').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_asianpaints_qtr_6)

# Pandas Dataframe
asianpaints_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
asianpaints_df_qtr_6.drop(asianpaints_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_6.rename(columns=asianpaints_df_qtr_6.iloc[0], inplace = True)
asianpaints_df_qtr_6_drop = asianpaints_df_qtr_6.drop(asianpaints_df_qtr_6.index[[0]])

# Drop row data not required for analysis
asianpaints_df_qtr_6_drop = asianpaints_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
asianpaints_df_qtr_6_drop

# Rename header to include company name
asianpaints_df_qtr_6_drop = asianpaints_df_qtr_6_drop.rename(columns={asianpaints_df_qtr_6_drop.columns[1]: 'Asian Paints Dec 14', asianpaints_df_qtr_6_drop.columns[2]: 'Asian Paints Sep 14',
                            asianpaints_df_qtr_6_drop.columns[3]: 'Asian Paints Jun 14', asianpaints_df_qtr_6_drop.columns[4]: 'Asian Paints Mar 14', asianpaints_df_qtr_6_drop.columns[5]: 'Asian Paints Dec 13'})

asianpaints_df_qtr_6_drop = asianpaints_df_qtr_6_drop.rename(columns = {'Quarterly Results of Asian Paints (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
asianpaints_df_qtr_6_drop_T = asianpaints_df_qtr_6_drop.transpose()

# Reset index for the dataframe
asianpaints_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
asianpaints_df_qtr_6_drop_T.rename(columns=asianpaints_df_qtr_6_drop_T.iloc[0], inplace = True)
asianpaints_df_qtr_6_drop_T = asianpaints_df_qtr_6_drop_T.drop(asianpaints_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Asian Paints
asianpaints_df = pd.concat([asianpaints_df_qtr_1_drop_T, asianpaints_df_qtr_2_drop_T], ignore_index=True)
asianpaints_df = pd.concat([asianpaints_df, asianpaints_df_qtr_3_drop_T], ignore_index=True)
asianpaints_df = pd.concat([asianpaints_df, asianpaints_df_qtr_4_drop_T], ignore_index=True)
asianpaints_df = pd.concat([asianpaints_df, asianpaints_df_qtr_5_drop_T], ignore_index=True)
asianpaints_df = pd.concat([asianpaints_df, asianpaints_df_qtr_6_drop_T], ignore_index=True)

# Print Dataframe
asianpaints_df

# Reverse the rows of the dataframe
asianpaints_df_r = asianpaints_df.iloc[::-1]
asianpaints_df_r = asianpaints_df_r.reset_index(drop=True)

# Merge two DataFrames by index using pandas.merge()
asianpaints_df_r_price = pd.merge(asianpaints_df_r, prices_df_qtr_asian, left_index=True, right_index=True)
asianpaints_df_r_price = pd.merge(asianpaints_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
asianpaints_df_r_price = pd.merge(asianpaints_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
asianpaints_df_r_price = pd.merge(asianpaints_df_r_price, crude_close ,  left_index=True, right_index=True)


asianpaints_df_final = asianpaints_df_r_price.drop(['ASIANPAINT.NS', '1q_avg_diff', 'index_x', 'index_y', 'index'], axis=1)
asianpaints_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_1 = asianpaints_df_final.iloc[:split_point]

# Create test data set
data_test_1 = asianpaints_df_final.iloc[split_point:]     


#### 02. Hindustan Unilever Ltd

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_hul_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/2#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_hul_qtr_1)


# Pandas Dataframe
hul_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
hul_df_qtr_1.drop(hul_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_1.rename(columns=hul_df_qtr_1.iloc[0], inplace = True)
hul_df_qtr_1_drop = hul_df_qtr_1.drop(hul_df_qtr_1.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_1_drop = hul_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
hul_df_qtr_1_drop = hul_df_qtr_1_drop.rename(columns = {'Mar 20':'HUL Mar 20'})
hul_df_qtr_1_drop = hul_df_qtr_1_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_1_drop_T = hul_df_qtr_1_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_1_drop_T.rename(columns=hul_df_qtr_1_drop_T.iloc[0], inplace = True)
hul_df_qtr_1_drop_T = hul_df_qtr_1_drop_T.drop(hul_df_qtr_1_drop_T.index[[0]])


xhtml_hul_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/3#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_hul_qtr_2)

# Pandas Dataframe
hul_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
hul_df_qtr_2.drop(hul_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_2.rename(columns=hul_df_qtr_2.iloc[0], inplace = True)
hul_df_qtr_2_drop = hul_df_qtr_2.drop(hul_df_qtr_2.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_2_drop = hul_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
hul_df_qtr_2_drop = hul_df_qtr_2_drop.rename(columns={hul_df_qtr_2_drop.columns[1]: 'HUL Dec 19', hul_df_qtr_2_drop.columns[2]: 'HUL Sep 19',
                            hul_df_qtr_2_drop.columns[3]: 'HUL Jun 19', hul_df_qtr_2_drop.columns[4]: 'HUL Mar 19', hul_df_qtr_2_drop.columns[5]: 'HUL Dec 18'})


hul_df_qtr_2_drop = hul_df_qtr_2_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_2_drop_T = hul_df_qtr_2_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_2_drop_T.rename(columns=hul_df_qtr_2_drop_T.iloc[0], inplace = True)
hul_df_qtr_2_drop_T = hul_df_qtr_2_drop_T.drop(hul_df_qtr_2_drop_T.index[[0]])

xhtml_hul_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/4#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_hul_qtr_3)

# Pandas Dataframe
hul_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
hul_df_qtr_3.drop(hul_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_3.rename(columns=hul_df_qtr_3.iloc[0], inplace = True)
hul_df_qtr_3_drop = hul_df_qtr_3.drop(hul_df_qtr_3.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_3_drop = hul_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
hul_df_qtr_3_drop

# Rename header to include company name
hul_df_qtr_3_drop = hul_df_qtr_3_drop.rename(columns={hul_df_qtr_3_drop.columns[1]: 'HUL Sep 18', hul_df_qtr_3_drop.columns[2]: 'HUL Jun 18',
                            hul_df_qtr_3_drop.columns[3]: 'HUL Mar 18', hul_df_qtr_3_drop.columns[4]: 'HUL Dec 17', hul_df_qtr_3_drop.columns[5]: 'HUL Sep 17'})

hul_df_qtr_3_drop = hul_df_qtr_3_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_3_drop_T = hul_df_qtr_3_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_3_drop_T.rename(columns=hul_df_qtr_3_drop_T.iloc[0], inplace = True)
hul_df_qtr_3_drop_T = hul_df_qtr_3_drop_T.drop(hul_df_qtr_3_drop_T.index[[0]])



xhtml_hul_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/5#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_hul_qtr_4)

# Pandas Dataframe

hul_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
hul_df_qtr_4.drop(hul_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_4.rename(columns=hul_df_qtr_4.iloc[0], inplace = True)
hul_df_qtr_4_drop = hul_df_qtr_4.drop(hul_df_qtr_4.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_4_drop = hul_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
hul_df_qtr_4_drop

# Rename header to include company name
hul_df_qtr_4_drop = hul_df_qtr_4_drop.rename(columns={hul_df_qtr_4_drop.columns[1]: 'HUL Jun 17', hul_df_qtr_4_drop.columns[2]: 'HUL Mar 17',
                            hul_df_qtr_4_drop.columns[3]: 'HUL Dec 16', hul_df_qtr_4_drop.columns[4]: 'HUL Sep 16', hul_df_qtr_4_drop.columns[5]: 'HUL Jun 16'})

hul_df_qtr_4_drop = hul_df_qtr_4_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_4_drop_T = hul_df_qtr_4_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_4_drop_T.rename(columns=hul_df_qtr_4_drop_T.iloc[0], inplace = True)
hul_df_qtr_4_drop_T = hul_df_qtr_4_drop_T.drop(hul_df_qtr_4_drop_T.index[[0]])

xhtml_hul_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/6#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_hul_qtr_5)

# Pandas Dataframe

hul_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
hul_df_qtr_5.drop(hul_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_5.rename(columns=hul_df_qtr_5.iloc[0], inplace = True)
hul_df_qtr_5_drop = hul_df_qtr_5.drop(hul_df_qtr_5.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_5_drop = hul_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
hul_df_qtr_5_drop

# Rename header to include company name
hul_df_qtr_5_drop = hul_df_qtr_5_drop.rename(columns={hul_df_qtr_5_drop.columns[1]: 'HUL Mar 16', hul_df_qtr_5_drop.columns[2]: 'HUL Dec 15',
                            hul_df_qtr_5_drop.columns[3]: 'HUL Sep 15', hul_df_qtr_5_drop.columns[4]: 'HUL Jun 15', hul_df_qtr_5_drop.columns[5]: 'HUL Mar 15'})

hul_df_qtr_5_drop = hul_df_qtr_5_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_5_drop_T = hul_df_qtr_5_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_5_drop_T.rename(columns=hul_df_qtr_5_drop_T.iloc[0], inplace = True)
hul_df_qtr_5_drop_T = hul_df_qtr_5_drop_T.drop(hul_df_qtr_5_drop_T.index[[0]])

xhtml_hul_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/hindustanunilever/results/quarterly-results/HU/7#HU').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_hul_qtr_6)

# Pandas Dataframe
hul_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
hul_df_qtr_6.drop(hul_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
hul_df_qtr_6.rename(columns=hul_df_qtr_6.iloc[0], inplace = True)
hul_df_qtr_6_drop = hul_df_qtr_6.drop(hul_df_qtr_6.index[[0]])

# Drop row data not required for analysis
hul_df_qtr_6_drop = hul_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
hul_df_qtr_6_drop

# Rename header to include company name
hul_df_qtr_6_drop = hul_df_qtr_6_drop.rename(columns={hul_df_qtr_6_drop.columns[1]: 'HUL Dec 14', hul_df_qtr_6_drop.columns[2]: 'HUL Sep 14',
                            hul_df_qtr_6_drop.columns[3]: 'HUL Jun 14', hul_df_qtr_6_drop.columns[4]: 'HUL Mar 14', hul_df_qtr_6_drop.columns[5]: 'HUL Dec 13'})

hul_df_qtr_6_drop = hul_df_qtr_6_drop.rename(columns = {'Quarterly Results of Hindustan Unilever (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
hul_df_qtr_6_drop_T = hul_df_qtr_6_drop.transpose()

# Reset index for the dataframe
hul_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
hul_df_qtr_6_drop_T.rename(columns=hul_df_qtr_6_drop_T.iloc[0], inplace = True)
hul_df_qtr_6_drop_T = hul_df_qtr_6_drop_T.drop(hul_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for HUL
hul_df = pd.concat([hul_df_qtr_1_drop_T, hul_df_qtr_2_drop_T], ignore_index=True)
hul_df = pd.concat([hul_df, hul_df_qtr_3_drop_T], ignore_index=True)
hul_df = pd.concat([hul_df, hul_df_qtr_4_drop_T], ignore_index=True)
hul_df = pd.concat([hul_df, hul_df_qtr_5_drop_T], ignore_index=True)
hul_df = pd.concat([hul_df, hul_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
hul_df_r = hul_df.iloc[::-1]
hul_df_r = hul_df_r.reset_index(drop=True)

# Merge two DataFrames by index using pandas.merge()
hul_df_r_price = pd.merge(hul_df_r, prices_df_qtr_hul, left_index=True, right_index=True)
hul_df_r_price = pd.merge(hul_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
hul_df_r_price = pd.merge(hul_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
hul_df_r_price = pd.merge(hul_df_r_price, crude_close ,  left_index=True, right_index=True)

hul_df_final = hul_df_r_price.drop(['HINDUNILVR.NS', '1q_avg_diff', 'index_x', 'index_y', 'index'], axis=1)
hul_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_2 = hul_df_final.iloc[:split_point]
# Create test data set
data_test_2 = hul_df_final.iloc[split_point:]   




#### 03. ITC

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_itc_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/2#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_itc_qtr_1)


# Pandas Dataframe
itc_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
itc_df_qtr_1.drop(itc_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_1.rename(columns=itc_df_qtr_1.iloc[0], inplace = True)
itc_df_qtr_1_drop = itc_df_qtr_1.drop(itc_df_qtr_1.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_1_drop = itc_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
itc_df_qtr_1_drop = itc_df_qtr_1_drop.rename(columns = {'Mar 20':'ITC Mar 20'})
itc_df_qtr_1_drop = itc_df_qtr_1_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_1_drop_T = itc_df_qtr_1_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_1_drop_T.rename(columns=itc_df_qtr_1_drop_T.iloc[0], inplace = True)
itc_df_qtr_1_drop_T = itc_df_qtr_1_drop_T.drop(itc_df_qtr_1_drop_T.index[[0]])


xhtml_itc_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/3#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_itc_qtr_2)

# Pandas Dataframe
itc_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
itc_df_qtr_2.drop(itc_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_2.rename(columns=itc_df_qtr_2.iloc[0], inplace = True)
itc_df_qtr_2_drop = itc_df_qtr_2.drop(itc_df_qtr_2.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_2_drop = itc_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
itc_df_qtr_2_drop = itc_df_qtr_2_drop.rename(columns={itc_df_qtr_2_drop.columns[1]: 'ITC Dec 19', itc_df_qtr_2_drop.columns[2]: 'ITC Sep 19',
                            itc_df_qtr_2_drop.columns[3]: 'ITC Jun 19', itc_df_qtr_2_drop.columns[4]: 'ITC Mar 19', itc_df_qtr_2_drop.columns[5]: 'ITC Dec 18'})


itc_df_qtr_2_drop = itc_df_qtr_2_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_2_drop_T = itc_df_qtr_2_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_2_drop_T.rename(columns=itc_df_qtr_2_drop_T.iloc[0], inplace = True)
itc_df_qtr_2_drop_T = itc_df_qtr_2_drop_T.drop(itc_df_qtr_2_drop_T.index[[0]])

xhtml_itc_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/4#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_itc_qtr_3)

# Pandas Dataframe
itc_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
itc_df_qtr_3.drop(itc_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_3.rename(columns=itc_df_qtr_3.iloc[0], inplace = True)
itc_df_qtr_3_drop = itc_df_qtr_3.drop(itc_df_qtr_3.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_3_drop = itc_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
itc_df_qtr_3_drop

# Rename header to include company name
itc_df_qtr_3_drop = itc_df_qtr_3_drop.rename(columns={itc_df_qtr_3_drop.columns[1]: 'ITC Sep 18', itc_df_qtr_3_drop.columns[2]: 'ITC Jun 18',
                            itc_df_qtr_3_drop.columns[3]: 'ITC Mar 18', itc_df_qtr_3_drop.columns[4]: 'ITC Dec 17', itc_df_qtr_3_drop.columns[5]: 'ITC Sep 17'})

itc_df_qtr_3_drop = itc_df_qtr_3_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_3_drop_T = itc_df_qtr_3_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_3_drop_T.rename(columns=itc_df_qtr_3_drop_T.iloc[0], inplace = True)
itc_df_qtr_3_drop_T = itc_df_qtr_3_drop_T.drop(itc_df_qtr_3_drop_T.index[[0]])



xhtml_itc_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/5#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_itc_qtr_4)

# Pandas Dataframe

itc_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
itc_df_qtr_4.drop(itc_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_4.rename(columns=itc_df_qtr_4.iloc[0], inplace = True)
itc_df_qtr_4_drop = itc_df_qtr_4.drop(itc_df_qtr_4.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_4_drop = itc_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
itc_df_qtr_4_drop

# Rename header to include company name
itc_df_qtr_4_drop = itc_df_qtr_4_drop.rename(columns={itc_df_qtr_4_drop.columns[1]: 'ITC Jun 17', itc_df_qtr_4_drop.columns[2]: 'ITC Mar 17',
                            itc_df_qtr_4_drop.columns[3]: 'ITC Dec 16', itc_df_qtr_4_drop.columns[4]: 'ITC Sep 16', itc_df_qtr_4_drop.columns[5]: 'ITC Jun 16'})

itc_df_qtr_4_drop = itc_df_qtr_4_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_4_drop_T = itc_df_qtr_4_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_4_drop_T.rename(columns=itc_df_qtr_4_drop_T.iloc[0], inplace = True)
itc_df_qtr_4_drop_T = itc_df_qtr_4_drop_T.drop(itc_df_qtr_4_drop_T.index[[0]])

xhtml_itc_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/6#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_itc_qtr_5)

# Pandas Dataframe

itc_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
itc_df_qtr_5.drop(itc_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_5.rename(columns=itc_df_qtr_5.iloc[0], inplace = True)
itc_df_qtr_5_drop = itc_df_qtr_5.drop(itc_df_qtr_5.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_5_drop = itc_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
itc_df_qtr_5_drop

# Rename header to include company name
itc_df_qtr_5_drop = itc_df_qtr_5_drop.rename(columns={itc_df_qtr_5_drop.columns[1]: 'ITC Mar 16', itc_df_qtr_5_drop.columns[2]: 'ITC Dec 15',
                            itc_df_qtr_5_drop.columns[3]: 'ITC Sep 15', itc_df_qtr_5_drop.columns[4]: 'ITC Jun 15', itc_df_qtr_5_drop.columns[5]: 'ITC Mar 15'})

itc_df_qtr_5_drop = itc_df_qtr_5_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_5_drop_T = itc_df_qtr_5_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_5_drop_T.rename(columns=itc_df_qtr_5_drop_T.iloc[0], inplace = True)
itc_df_qtr_5_drop_T = itc_df_qtr_5_drop_T.drop(itc_df_qtr_5_drop_T.index[[0]])

xhtml_itc_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/itc/results/quarterly-results/ITC/7#ITC').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_itc_qtr_6)

# Pandas Dataframe
itc_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
itc_df_qtr_6.drop(itc_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
itc_df_qtr_6.rename(columns=itc_df_qtr_6.iloc[0], inplace = True)
itc_df_qtr_6_drop = itc_df_qtr_6.drop(itc_df_qtr_6.index[[0]])

# Drop row data not required for analysis
itc_df_qtr_6_drop = itc_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
itc_df_qtr_6_drop

# Rename header to include company name
itc_df_qtr_6_drop = itc_df_qtr_6_drop.rename(columns={itc_df_qtr_6_drop.columns[1]: 'ITC Dec 14', itc_df_qtr_6_drop.columns[2]: 'ITC Sep 14',
                            itc_df_qtr_6_drop.columns[3]: 'ITC Jun 14', itc_df_qtr_6_drop.columns[4]: 'ITC Mar 14', itc_df_qtr_6_drop.columns[5]: 'ITC Dec 13'})

itc_df_qtr_6_drop = itc_df_qtr_6_drop.rename(columns = {'Quarterly Results of ITC (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
itc_df_qtr_6_drop_T = itc_df_qtr_6_drop.transpose()

# Reset index for the dataframe
itc_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
itc_df_qtr_6_drop_T.rename(columns=itc_df_qtr_6_drop_T.iloc[0], inplace = True)
itc_df_qtr_6_drop_T = itc_df_qtr_6_drop_T.drop(itc_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for ITC
itc_df = pd.concat([itc_df_qtr_1_drop_T, itc_df_qtr_2_drop_T], ignore_index=True)
itc_df = pd.concat([itc_df, itc_df_qtr_3_drop_T], ignore_index=True)
itc_df = pd.concat([itc_df, itc_df_qtr_4_drop_T], ignore_index=True)
itc_df = pd.concat([itc_df, itc_df_qtr_5_drop_T], ignore_index=True)
itc_df = pd.concat([itc_df, itc_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
itc_df_r = itc_df.iloc[::-1]
itc_df_r = itc_df_r.reset_index(drop=True)

# Merge two DataFrames by index using pandas.merge()
itc_df_r_price = pd.merge(itc_df_r, prices_df_qtr_itc, left_index=True, right_index=True)
itc_df_r_price = pd.merge(itc_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
itc_df_r_price = pd.merge(itc_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
itc_df_r_price = pd.merge(itc_df_r_price, crude_close ,  left_index=True, right_index=True)
itc_df_final = itc_df_r_price.drop(['ITC.NS', '1q_avg_diff', 'index_x', 'index_y', 'index'], axis=1)
itc_df_final

# Define split point
#split_point = 20                             

# Create train set
data_train_3 = itc_df_final.iloc[:split_point]
# Create test data set
data_test_3 = itc_df_final.iloc[split_point:]   



#### 04. Nestlé India

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_nst_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/2#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_nst_qtr_1)


# Pandas Dataframe
nst_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
nst_df_qtr_1.drop(nst_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_1.rename(columns=nst_df_qtr_1.iloc[0], inplace = True)
nst_df_qtr_1_drop = nst_df_qtr_1.drop(nst_df_qtr_1.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_1_drop = nst_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
nst_df_qtr_1_drop = nst_df_qtr_1_drop.rename(columns = {'Mar 20':'Nestle Mar 20'})
nst_df_qtr_1_drop = nst_df_qtr_1_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_1_drop_T = nst_df_qtr_1_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_1_drop_T.rename(columns=nst_df_qtr_1_drop_T.iloc[0], inplace = True)
nst_df_qtr_1_drop_T = nst_df_qtr_1_drop_T.drop(nst_df_qtr_1_drop_T.index[[0]])


xhtml_nst_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/3#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_nst_qtr_2)

# Pandas Dataframe
nst_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
nst_df_qtr_2.drop(nst_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_2.rename(columns=nst_df_qtr_2.iloc[0], inplace = True)
nst_df_qtr_2_drop = nst_df_qtr_2.drop(nst_df_qtr_2.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_2_drop = nst_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
nst_df_qtr_2_drop = nst_df_qtr_2_drop.rename(columns={nst_df_qtr_2_drop.columns[1]: 'Nestle Dec 19', nst_df_qtr_2_drop.columns[2]: 'Nestle Sep 19',
                            nst_df_qtr_2_drop.columns[3]: 'Nestle Jun 19', nst_df_qtr_2_drop.columns[4]: 'Nestle Mar 19', nst_df_qtr_2_drop.columns[5]: 'Nestle Dec 18'})


nst_df_qtr_2_drop = nst_df_qtr_2_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_2_drop_T = nst_df_qtr_2_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_2_drop_T.rename(columns=nst_df_qtr_2_drop_T.iloc[0], inplace = True)
nst_df_qtr_2_drop_T = nst_df_qtr_2_drop_T.drop(nst_df_qtr_2_drop_T.index[[0]])

xhtml_nst_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/4#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_nst_qtr_3)

# Pandas Dataframe
nst_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
nst_df_qtr_3.drop(nst_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_3.rename(columns=nst_df_qtr_3.iloc[0], inplace = True)
nst_df_qtr_3_drop = nst_df_qtr_3.drop(nst_df_qtr_3.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_3_drop = nst_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
nst_df_qtr_3_drop

# Rename header to include company name
nst_df_qtr_3_drop = nst_df_qtr_3_drop.rename(columns={nst_df_qtr_3_drop.columns[1]: 'Nestle Sep 18', nst_df_qtr_3_drop.columns[2]: 'Nestle Jun 18',
                            nst_df_qtr_3_drop.columns[3]: 'Nestle Mar 18', nst_df_qtr_3_drop.columns[4]: 'Nestle Dec 17', nst_df_qtr_3_drop.columns[5]: 'Nestle Sep 17'})

nst_df_qtr_3_drop = nst_df_qtr_3_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_3_drop_T = nst_df_qtr_3_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_3_drop_T.rename(columns=nst_df_qtr_3_drop_T.iloc[0], inplace = True)
nst_df_qtr_3_drop_T = nst_df_qtr_3_drop_T.drop(nst_df_qtr_3_drop_T.index[[0]])



xhtml_nst_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/5#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_nst_qtr_4)

# Pandas Dataframe

nst_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
nst_df_qtr_4.drop(nst_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_4.rename(columns=nst_df_qtr_4.iloc[0], inplace = True)
nst_df_qtr_4_drop = nst_df_qtr_4.drop(nst_df_qtr_4.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_4_drop = nst_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
nst_df_qtr_4_drop

# Rename header to include company name
nst_df_qtr_4_drop = nst_df_qtr_4_drop.rename(columns={nst_df_qtr_4_drop.columns[1]: 'Nestle Jun 17', nst_df_qtr_4_drop.columns[2]: 'Nestle Mar 17',
                            nst_df_qtr_4_drop.columns[3]: 'Nestle Dec 16', nst_df_qtr_4_drop.columns[4]: 'Nestle Sep 16', nst_df_qtr_4_drop.columns[5]: 'Nestle Jun 16'})

nst_df_qtr_4_drop = nst_df_qtr_4_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_4_drop_T = nst_df_qtr_4_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_4_drop_T.rename(columns=nst_df_qtr_4_drop_T.iloc[0], inplace = True)
nst_df_qtr_4_drop_T = nst_df_qtr_4_drop_T.drop(nst_df_qtr_4_drop_T.index[[0]])

xhtml_nst_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/6#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_nst_qtr_5)

# Pandas Dataframe

nst_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
nst_df_qtr_5.drop(nst_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_5.rename(columns=nst_df_qtr_5.iloc[0], inplace = True)
nst_df_qtr_5_drop = nst_df_qtr_5.drop(nst_df_qtr_5.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_5_drop = nst_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
nst_df_qtr_5_drop

# Rename header to include company name
nst_df_qtr_5_drop = nst_df_qtr_5_drop.rename(columns={nst_df_qtr_5_drop.columns[1]: 'Nestle Mar 16', nst_df_qtr_5_drop.columns[2]: 'Nestle Dec 15',
                            nst_df_qtr_5_drop.columns[3]: 'Nestle Sep 15', nst_df_qtr_5_drop.columns[4]: 'Nestle Jun 15', nst_df_qtr_5_drop.columns[5]: 'Nestle Mar 15'})

nst_df_qtr_5_drop = nst_df_qtr_5_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_5_drop_T = nst_df_qtr_5_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_5_drop_T.rename(columns=nst_df_qtr_5_drop_T.iloc[0], inplace = True)
nst_df_qtr_5_drop_T = nst_df_qtr_5_drop_T.drop(nst_df_qtr_5_drop_T.index[[0]])

xhtml_nst_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/nestleindia/results/quarterly-results/NI/7#NI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_nst_qtr_6)

# Pandas Dataframe
nst_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
nst_df_qtr_6.drop(nst_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
nst_df_qtr_6.rename(columns=nst_df_qtr_6.iloc[0], inplace = True)
nst_df_qtr_6_drop = nst_df_qtr_6.drop(nst_df_qtr_6.index[[0]])

# Drop row data not required for analysis
nst_df_qtr_6_drop = nst_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
nst_df_qtr_6_drop

# Rename header to include company name
nst_df_qtr_6_drop = nst_df_qtr_6_drop.rename(columns={nst_df_qtr_6_drop.columns[1]: 'Nestle Dec 14', nst_df_qtr_6_drop.columns[2]: 'Nestle Sep 14',
                            nst_df_qtr_6_drop.columns[3]: 'Nestle Jun 14', nst_df_qtr_6_drop.columns[4]: 'Nestle Mar 14', nst_df_qtr_6_drop.columns[5]: 'Nestle Dec 13'})

nst_df_qtr_6_drop = nst_df_qtr_6_drop.rename(columns = {'Quarterly Results of Nestle India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
nst_df_qtr_6_drop_T = nst_df_qtr_6_drop.transpose()

# Reset index for the dataframe
nst_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
nst_df_qtr_6_drop_T.rename(columns=nst_df_qtr_6_drop_T.iloc[0], inplace = True)
nst_df_qtr_6_drop_T = nst_df_qtr_6_drop_T.drop(nst_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for ITC
nst_df = pd.concat([nst_df_qtr_1_drop_T, nst_df_qtr_2_drop_T], ignore_index=True)
nst_df = pd.concat([nst_df, nst_df_qtr_3_drop_T], ignore_index=True)
nst_df = pd.concat([nst_df, nst_df_qtr_4_drop_T], ignore_index=True)
nst_df = pd.concat([nst_df, nst_df_qtr_5_drop_T], ignore_index=True)
nst_df = pd.concat([nst_df, nst_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
nst_df_r = nst_df.iloc[::-1]
nst_df_r = nst_df_r.reset_index(drop=True)

# Merge two DataFrames by index using pandas.merge()
nst_df_r_price = pd.merge(nst_df_r, prices_df_qtr_nestle, left_index=True, right_index=True)
nst_df_r_price = pd.merge(nst_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
nst_df_r_price = pd.merge(nst_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
nst_df_r_price = pd.merge(nst_df_r_price, crude_close ,  left_index=True, right_index=True)

nst_df_final = nst_df_r_price.drop(['NESTLEIND.NS', '1q_avg_diff', 'index_x', 'index_y', 'index'], axis=1)
nst_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_4 = nst_df_final.iloc[:split_point]
# Create test data set
data_test_4 = nst_df_final.iloc[split_point:]


#### 05. Dabur India Ltd

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_dbr_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/2#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_dbr_qtr_1)


# Pandas Dataframe
dbr_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
dbr_df_qtr_1.drop(dbr_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_1.rename(columns=dbr_df_qtr_1.iloc[0], inplace = True)
dbr_df_qtr_1_drop = dbr_df_qtr_1.drop(dbr_df_qtr_1.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_1_drop = dbr_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
dbr_df_qtr_1_drop = dbr_df_qtr_1_drop.rename(columns = {'Mar 20':'Dabur Mar 20'})
dbr_df_qtr_1_drop = dbr_df_qtr_1_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_1_drop_T = dbr_df_qtr_1_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_1_drop_T.rename(columns=dbr_df_qtr_1_drop_T.iloc[0], inplace = True)
dbr_df_qtr_1_drop_T = dbr_df_qtr_1_drop_T.drop(dbr_df_qtr_1_drop_T.index[[0]])


xhtml_dbr_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/3#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_dbr_qtr_2)

# Pandas Dataframe
dbr_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
dbr_df_qtr_2.drop(dbr_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_2.rename(columns=dbr_df_qtr_2.iloc[0], inplace = True)
dbr_df_qtr_2_drop = dbr_df_qtr_2.drop(dbr_df_qtr_2.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_2_drop = dbr_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
dbr_df_qtr_2_drop = dbr_df_qtr_2_drop.rename(columns={dbr_df_qtr_2_drop.columns[1]: 'Dabur Dec 19', dbr_df_qtr_2_drop.columns[2]: 'Dabur Sep 19',
                            dbr_df_qtr_2_drop.columns[3]: 'Dabur Jun 19', dbr_df_qtr_2_drop.columns[4]: 'Dabur Mar 19', dbr_df_qtr_2_drop.columns[5]: 'Dabur Dec 18'})


dbr_df_qtr_2_drop = dbr_df_qtr_2_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_2_drop_T = dbr_df_qtr_2_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_2_drop_T.rename(columns=dbr_df_qtr_2_drop_T.iloc[0], inplace = True)
dbr_df_qtr_2_drop_T = dbr_df_qtr_2_drop_T.drop(dbr_df_qtr_2_drop_T.index[[0]])

xhtml_dbr_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/4#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_dbr_qtr_3)

# Pandas Dataframe
dbr_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
dbr_df_qtr_3.drop(dbr_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_3.rename(columns=dbr_df_qtr_3.iloc[0], inplace = True)
dbr_df_qtr_3_drop = dbr_df_qtr_3.drop(dbr_df_qtr_3.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_3_drop = dbr_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
dbr_df_qtr_3_drop

# Rename header to include company name
dbr_df_qtr_3_drop = dbr_df_qtr_3_drop.rename(columns={dbr_df_qtr_3_drop.columns[1]: 'Dabur Sep 18', dbr_df_qtr_3_drop.columns[2]: 'Dabur Jun 18',
                            dbr_df_qtr_3_drop.columns[3]: 'Dabur Mar 18', dbr_df_qtr_3_drop.columns[4]: 'Dabur Dec 17', dbr_df_qtr_3_drop.columns[5]: 'Dabur Sep 17'})

dbr_df_qtr_3_drop = dbr_df_qtr_3_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_3_drop_T = dbr_df_qtr_3_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_3_drop_T.rename(columns=dbr_df_qtr_3_drop_T.iloc[0], inplace = True)
dbr_df_qtr_3_drop_T = dbr_df_qtr_3_drop_T.drop(dbr_df_qtr_3_drop_T.index[[0]])



xhtml_dbr_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/5#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_dbr_qtr_4)

# Pandas Dataframe

dbr_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
dbr_df_qtr_4.drop(dbr_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_4.rename(columns=dbr_df_qtr_4.iloc[0], inplace = True)
dbr_df_qtr_4_drop = dbr_df_qtr_4.drop(dbr_df_qtr_4.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_4_drop = dbr_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
dbr_df_qtr_4_drop

# Rename header to include company name
dbr_df_qtr_4_drop = dbr_df_qtr_4_drop.rename(columns={dbr_df_qtr_4_drop.columns[1]: 'Dabur Jun 17', dbr_df_qtr_4_drop.columns[2]: 'Dabur Mar 17',
                            dbr_df_qtr_4_drop.columns[3]: 'Dabur Dec 16', dbr_df_qtr_4_drop.columns[4]: 'Dabur Sep 16', dbr_df_qtr_4_drop.columns[5]: 'Dabur Jun 16'})

dbr_df_qtr_4_drop = dbr_df_qtr_4_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_4_drop_T = dbr_df_qtr_4_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_4_drop_T.rename(columns=dbr_df_qtr_4_drop_T.iloc[0], inplace = True)
dbr_df_qtr_4_drop_T = dbr_df_qtr_4_drop_T.drop(dbr_df_qtr_4_drop_T.index[[0]])

xhtml_dbr_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/6#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_dbr_qtr_5)

# Pandas Dataframe

dbr_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
dbr_df_qtr_5.drop(dbr_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_5.rename(columns=dbr_df_qtr_5.iloc[0], inplace = True)
dbr_df_qtr_5_drop = dbr_df_qtr_5.drop(dbr_df_qtr_5.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_5_drop = dbr_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
dbr_df_qtr_5_drop

# Rename header to include company name
dbr_df_qtr_5_drop = dbr_df_qtr_5_drop.rename(columns={dbr_df_qtr_5_drop.columns[1]: 'Dabur Mar 16', dbr_df_qtr_5_drop.columns[2]: 'Dabur Dec 15',
                            dbr_df_qtr_5_drop.columns[3]: 'Dabur Sep 15', dbr_df_qtr_5_drop.columns[4]: 'Dabur Jun 15', dbr_df_qtr_5_drop.columns[5]: 'Dabur Mar 15'})

dbr_df_qtr_5_drop = dbr_df_qtr_5_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_5_drop_T = dbr_df_qtr_5_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_5_drop_T.rename(columns=dbr_df_qtr_5_drop_T.iloc[0], inplace = True)
dbr_df_qtr_5_drop_T = dbr_df_qtr_5_drop_T.drop(dbr_df_qtr_5_drop_T.index[[0]])

xhtml_dbr_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/daburindia/results/quarterly-results/DI/7#DI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_dbr_qtr_6)

# Pandas Dataframe
dbr_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
dbr_df_qtr_6.drop(dbr_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_6.rename(columns=dbr_df_qtr_6.iloc[0], inplace = True)
dbr_df_qtr_6_drop = dbr_df_qtr_6.drop(dbr_df_qtr_6.index[[0]])

# Drop row data not required for analysis
dbr_df_qtr_6_drop = dbr_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
dbr_df_qtr_6_drop

# Rename header to include company name
dbr_df_qtr_6_drop = dbr_df_qtr_6_drop.rename(columns={dbr_df_qtr_6_drop.columns[1]: 'Dabur Dec 14', dbr_df_qtr_6_drop.columns[2]: 'Dabur Sep 14',
                            dbr_df_qtr_6_drop.columns[3]: 'Dabur Jun 14', dbr_df_qtr_6_drop.columns[4]: 'Dabur Mar 14', dbr_df_qtr_6_drop.columns[5]: 'Dabur Dec 13'})

dbr_df_qtr_6_drop = dbr_df_qtr_6_drop.rename(columns = {'Quarterly Results of Dabur India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
dbr_df_qtr_6_drop_T = dbr_df_qtr_6_drop.transpose()

# Reset index for the dataframe
dbr_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
dbr_df_qtr_6_drop_T.rename(columns=dbr_df_qtr_6_drop_T.iloc[0], inplace = True)
dbr_df_qtr_6_drop_T = dbr_df_qtr_6_drop_T.drop(dbr_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Dabur
dbr_df = pd.concat([dbr_df_qtr_1_drop_T, dbr_df_qtr_2_drop_T], ignore_index=True)
dbr_df = pd.concat([dbr_df, dbr_df_qtr_3_drop_T], ignore_index=True)
dbr_df = pd.concat([dbr_df, dbr_df_qtr_4_drop_T], ignore_index=True)
dbr_df = pd.concat([dbr_df, dbr_df_qtr_5_drop_T], ignore_index=True)
dbr_df = pd.concat([dbr_df, dbr_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
dbr_df_r = dbr_df.iloc[::-1]
dbr_df_r = dbr_df_r.reset_index(drop=True)


# Merge two DataFrames by index using pandas.merge()
dbr_df_r_price = pd.merge(dbr_df_r, prices_df_qtr_dabur, left_index=True, right_index=True)
dbr_df_r_price = pd.merge(dbr_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
dbr_df_r_price = pd.merge(dbr_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
dbr_df_r_price = pd.merge(dbr_df_r_price, crude_close ,  left_index=True, right_index=True)

dbr_df_final = dbr_df_r_price.drop(['DABUR.NS', '1q_avg_diff', 'index_x', 'index_y', 'index'], axis=1)
dbr_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_5 = dbr_df_final.iloc[:split_point]
# Create test data set
data_test_5 = dbr_df_final.iloc[split_point:]   



#### 06. Britannia Industries Ltd

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_bri_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/2#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_bri_qtr_1)


# Pandas Dataframe
bri_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
bri_df_qtr_1.drop(bri_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_1.rename(columns=bri_df_qtr_1.iloc[0], inplace = True)
bri_df_qtr_1_drop = bri_df_qtr_1.drop(bri_df_qtr_1.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_1_drop = bri_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
bri_df_qtr_1_drop = bri_df_qtr_1_drop.rename(columns = {'Mar 20':'Britannia Mar 20'})
bri_df_qtr_1_drop = bri_df_qtr_1_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_1_drop_T = bri_df_qtr_1_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_1_drop_T.rename(columns=bri_df_qtr_1_drop_T.iloc[0], inplace = True)
bri_df_qtr_1_drop_T = bri_df_qtr_1_drop_T.drop(bri_df_qtr_1_drop_T.index[[0]])


xhtml_bri_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/3#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_bri_qtr_2)

# Pandas Dataframe
bri_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
bri_df_qtr_2.drop(bri_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_2.rename(columns=bri_df_qtr_2.iloc[0], inplace = True)
bri_df_qtr_2_drop = bri_df_qtr_2.drop(bri_df_qtr_2.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_2_drop = bri_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
bri_df_qtr_2_drop = bri_df_qtr_2_drop.rename(columns={bri_df_qtr_2_drop.columns[1]: 'Britannia Dec 19', bri_df_qtr_2_drop.columns[2]: 'Britannia Sep 19',
                            bri_df_qtr_2_drop.columns[3]: 'Britannia Jun 19', bri_df_qtr_2_drop.columns[4]: 'Britannia Mar 19', bri_df_qtr_2_drop.columns[5]: 'Britannia Dec 18'})


bri_df_qtr_2_drop = bri_df_qtr_2_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_2_drop_T = bri_df_qtr_2_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_2_drop_T.rename(columns=bri_df_qtr_2_drop_T.iloc[0], inplace = True)
bri_df_qtr_2_drop_T = bri_df_qtr_2_drop_T.drop(bri_df_qtr_2_drop_T.index[[0]])

xhtml_bri_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/4#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_bri_qtr_3)

# Pandas Dataframe
bri_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
bri_df_qtr_3.drop(bri_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_3.rename(columns=bri_df_qtr_3.iloc[0], inplace = True)
bri_df_qtr_3_drop = bri_df_qtr_3.drop(bri_df_qtr_3.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_3_drop = bri_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
bri_df_qtr_3_drop

# Rename header to include company name
bri_df_qtr_3_drop = bri_df_qtr_3_drop.rename(columns={bri_df_qtr_3_drop.columns[1]: 'Britannia Sep 18', bri_df_qtr_3_drop.columns[2]: 'Britannia Jun 18',
                            bri_df_qtr_3_drop.columns[3]: 'Britannia Mar 18', bri_df_qtr_3_drop.columns[4]: 'Britannia Dec 17', bri_df_qtr_3_drop.columns[5]: 'Britannia Sep 17'})

bri_df_qtr_3_drop = bri_df_qtr_3_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_3_drop_T = bri_df_qtr_3_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_3_drop_T.rename(columns=bri_df_qtr_3_drop_T.iloc[0], inplace = True)
bri_df_qtr_3_drop_T = bri_df_qtr_3_drop_T.drop(bri_df_qtr_3_drop_T.index[[0]])



xhtml_bri_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/5#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_bri_qtr_4)

# Pandas Dataframe

bri_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
bri_df_qtr_4.drop(bri_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_4.rename(columns=bri_df_qtr_4.iloc[0], inplace = True)
bri_df_qtr_4_drop = bri_df_qtr_4.drop(bri_df_qtr_4.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_4_drop = bri_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
bri_df_qtr_4_drop

# Rename header to include company name
bri_df_qtr_4_drop = bri_df_qtr_4_drop.rename(columns={bri_df_qtr_4_drop.columns[1]: 'Britannia Jun 17', bri_df_qtr_4_drop.columns[2]: 'Britannia Mar 17',
                            bri_df_qtr_4_drop.columns[3]: 'Britannia Dec 16', bri_df_qtr_4_drop.columns[4]: 'Britannia Sep 16', bri_df_qtr_4_drop.columns[5]: 'Britannia Jun 16'})

bri_df_qtr_4_drop = bri_df_qtr_4_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_4_drop_T = bri_df_qtr_4_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_4_drop_T.rename(columns=bri_df_qtr_4_drop_T.iloc[0], inplace = True)
bri_df_qtr_4_drop_T = bri_df_qtr_4_drop_T.drop(bri_df_qtr_4_drop_T.index[[0]])

xhtml_bri_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/6#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_bri_qtr_5)

# Pandas Dataframe

bri_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
bri_df_qtr_5.drop(bri_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_5.rename(columns=bri_df_qtr_5.iloc[0], inplace = True)
bri_df_qtr_5_drop = bri_df_qtr_5.drop(bri_df_qtr_5.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_5_drop = bri_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
bri_df_qtr_5_drop

# Rename header to include company name
bri_df_qtr_5_drop = bri_df_qtr_5_drop.rename(columns={bri_df_qtr_5_drop.columns[1]: 'Britannia Mar 16', bri_df_qtr_5_drop.columns[2]: 'Britannia Dec 15',
                            bri_df_qtr_5_drop.columns[3]: 'Britannia Sep 15', bri_df_qtr_5_drop.columns[4]: 'Britannia Jun 15', bri_df_qtr_5_drop.columns[5]: 'Britannia Mar 15'})

bri_df_qtr_5_drop = bri_df_qtr_5_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_5_drop_T = bri_df_qtr_5_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_5_drop_T.rename(columns=bri_df_qtr_5_drop_T.iloc[0], inplace = True)
bri_df_qtr_5_drop_T = bri_df_qtr_5_drop_T.drop(bri_df_qtr_5_drop_T.index[[0]])

xhtml_bri_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/britanniaindustries/results/quarterly-results/BI/7#BI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_bri_qtr_6)

# Pandas Dataframe
bri_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
bri_df_qtr_6.drop(bri_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
bri_df_qtr_6.rename(columns=bri_df_qtr_6.iloc[0], inplace = True)
bri_df_qtr_6_drop = bri_df_qtr_6.drop(bri_df_qtr_6.index[[0]])

# Drop row data not required for analysis
bri_df_qtr_6_drop = bri_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
bri_df_qtr_6_drop

# Rename header to include company name
bri_df_qtr_6_drop = bri_df_qtr_6_drop.rename(columns={bri_df_qtr_6_drop.columns[1]: 'Britannia Dec 14', bri_df_qtr_6_drop.columns[2]: 'Britannia Sep 14',
                            bri_df_qtr_6_drop.columns[3]: 'Britannia Jun 14', bri_df_qtr_6_drop.columns[4]: 'Britannia Mar 14', bri_df_qtr_6_drop.columns[5]: 'Britannia Dec 13'})

bri_df_qtr_6_drop = bri_df_qtr_6_drop.rename(columns = {'Quarterly Results of Britannia Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
bri_df_qtr_6_drop_T = bri_df_qtr_6_drop.transpose()

# Reset index for the dataframe
bri_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
bri_df_qtr_6_drop_T.rename(columns=bri_df_qtr_6_drop_T.iloc[0], inplace = True)
bri_df_qtr_6_drop_T = bri_df_qtr_6_drop_T.drop(bri_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Britannia
bri_df = pd.concat([bri_df_qtr_1_drop_T, bri_df_qtr_2_drop_T], ignore_index=True)
bri_df = pd.concat([bri_df, bri_df_qtr_3_drop_T], ignore_index=True)
bri_df = pd.concat([bri_df, bri_df_qtr_4_drop_T], ignore_index=True)
bri_df = pd.concat([bri_df, bri_df_qtr_5_drop_T], ignore_index=True)
bri_df = pd.concat([bri_df, bri_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
bri_df_r = bri_df.iloc[::-1]
bri_df_r = bri_df_r.reset_index(drop=True)
bri_df_r

# Merge two DataFrames by index using pandas.merge()
bri_df_r_price = pd.merge(bri_df_r, prices_df_qtr_brit, left_index=True, right_index=True)
bri_df_r_price = pd.merge(bri_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
bri_df_r_price = pd.merge(bri_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
bri_df_r_price = pd.merge(bri_df_r_price, crude_close ,  left_index=True, right_index=True)

bri_df_final = bri_df_r_price.drop(['BRITANNIA.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
bri_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_6 = bri_df_final.iloc[:split_point]
# Create test data set
data_test_6 = bri_df_final.iloc[split_point:]



#### 07. Emami Limited

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_ema_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/2#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_ema_qtr_1)


# Pandas Dataframe
ema_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
ema_df_qtr_1.drop(ema_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_1.rename(columns=ema_df_qtr_1.iloc[0], inplace = True)
ema_df_qtr_1_drop = ema_df_qtr_1.drop(ema_df_qtr_1.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_1_drop = ema_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
ema_df_qtr_1_drop = ema_df_qtr_1_drop.rename(columns = {'Mar 20':'Emami Mar 20'})
ema_df_qtr_1_drop = ema_df_qtr_1_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_1_drop_T = ema_df_qtr_1_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_1_drop_T.rename(columns=ema_df_qtr_1_drop_T.iloc[0], inplace = True)
ema_df_qtr_1_drop_T = ema_df_qtr_1_drop_T.drop(ema_df_qtr_1_drop_T.index[[0]])


xhtml_ema_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/3#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ema_qtr_2)

# Pandas Dataframe
ema_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ema_df_qtr_2.drop(ema_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_2.rename(columns=ema_df_qtr_2.iloc[0], inplace = True)
ema_df_qtr_2_drop = ema_df_qtr_2.drop(ema_df_qtr_2.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_2_drop = ema_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
ema_df_qtr_2_drop = ema_df_qtr_2_drop.rename(columns={ema_df_qtr_2_drop.columns[1]: 'Emami Dec 19', ema_df_qtr_2_drop.columns[2]: 'Emami Sep 19',
                            ema_df_qtr_2_drop.columns[3]: 'Emami Jun 19', ema_df_qtr_2_drop.columns[4]: 'Emami Mar 19', ema_df_qtr_2_drop.columns[5]: 'Emami Dec 18'})


ema_df_qtr_2_drop = ema_df_qtr_2_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_2_drop_T = ema_df_qtr_2_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_2_drop_T.rename(columns=ema_df_qtr_2_drop_T.iloc[0], inplace = True)
ema_df_qtr_2_drop_T = ema_df_qtr_2_drop_T.drop(ema_df_qtr_2_drop_T.index[[0]])

xhtml_ema_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/4#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ema_qtr_3)

# Pandas Dataframe
ema_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ema_df_qtr_3.drop(ema_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_3.rename(columns=ema_df_qtr_3.iloc[0], inplace = True)
ema_df_qtr_3_drop = ema_df_qtr_3.drop(ema_df_qtr_3.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_3_drop = ema_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ema_df_qtr_3_drop

# Rename header to include company name
ema_df_qtr_3_drop = ema_df_qtr_3_drop.rename(columns={ema_df_qtr_3_drop.columns[1]: 'Emami Sep 18', ema_df_qtr_3_drop.columns[2]: 'Emami Jun 18',
                            ema_df_qtr_3_drop.columns[3]: 'Emami Mar 18', ema_df_qtr_3_drop.columns[4]: 'Emami Dec 17', ema_df_qtr_3_drop.columns[5]: 'Emami Sep 17'})

ema_df_qtr_3_drop = ema_df_qtr_3_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_3_drop_T = ema_df_qtr_3_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_3_drop_T.rename(columns=ema_df_qtr_3_drop_T.iloc[0], inplace = True)
ema_df_qtr_3_drop_T = ema_df_qtr_3_drop_T.drop(ema_df_qtr_3_drop_T.index[[0]])



xhtml_ema_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/5#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ema_qtr_4)

# Pandas Dataframe

ema_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ema_df_qtr_4.drop(ema_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_4.rename(columns=ema_df_qtr_4.iloc[0], inplace = True)
ema_df_qtr_4_drop = ema_df_qtr_4.drop(ema_df_qtr_4.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_4_drop = ema_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ema_df_qtr_4_drop

# Rename header to include company name
ema_df_qtr_4_drop = ema_df_qtr_4_drop.rename(columns={ema_df_qtr_4_drop.columns[1]: 'Emami Jun 17', ema_df_qtr_4_drop.columns[2]: 'Emami Mar 17',
                            ema_df_qtr_4_drop.columns[3]: 'Emami Dec 16', ema_df_qtr_4_drop.columns[4]: 'Emami Sep 16', ema_df_qtr_4_drop.columns[5]: 'Emami Jun 16'})

ema_df_qtr_4_drop = ema_df_qtr_4_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_4_drop_T = ema_df_qtr_4_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_4_drop_T.rename(columns=ema_df_qtr_4_drop_T.iloc[0], inplace = True)
ema_df_qtr_4_drop_T = ema_df_qtr_4_drop_T.drop(ema_df_qtr_4_drop_T.index[[0]])

xhtml_ema_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/6#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ema_qtr_5)

# Pandas Dataframe

ema_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ema_df_qtr_5.drop(ema_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_5.rename(columns=ema_df_qtr_5.iloc[0], inplace = True)
ema_df_qtr_5_drop = ema_df_qtr_5.drop(ema_df_qtr_5.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_5_drop = ema_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ema_df_qtr_5_drop

# Rename header to include company name
ema_df_qtr_5_drop = ema_df_qtr_5_drop.rename(columns={ema_df_qtr_5_drop.columns[1]: 'Emami Mar 16', ema_df_qtr_5_drop.columns[2]: 'Emami Dec 15',
                            ema_df_qtr_5_drop.columns[3]: 'Emami Sep 15', ema_df_qtr_5_drop.columns[4]: 'Emami Jun 15', ema_df_qtr_5_drop.columns[5]: 'Emami Mar 15'})

ema_df_qtr_5_drop = ema_df_qtr_5_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_5_drop_T = ema_df_qtr_5_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_5_drop_T.rename(columns=ema_df_qtr_5_drop_T.iloc[0], inplace = True)
ema_df_qtr_5_drop_T = ema_df_qtr_5_drop_T.drop(ema_df_qtr_5_drop_T.index[[0]])

xhtml_ema_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/emami/results/quarterly-results/E06/7#E06').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_ema_qtr_6)

# Pandas Dataframe
ema_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ema_df_qtr_6.drop(ema_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ema_df_qtr_6.rename(columns=ema_df_qtr_6.iloc[0], inplace = True)
ema_df_qtr_6_drop = ema_df_qtr_6.drop(ema_df_qtr_6.index[[0]])

# Drop row data not required for analysis
ema_df_qtr_6_drop = ema_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ema_df_qtr_6_drop

# Rename header to include company name
ema_df_qtr_6_drop = ema_df_qtr_6_drop.rename(columns={ema_df_qtr_6_drop.columns[1]: 'Emami Dec 14', ema_df_qtr_6_drop.columns[2]: 'Emami Sep 14',
                            ema_df_qtr_6_drop.columns[3]: 'Emami Jun 14', ema_df_qtr_6_drop.columns[4]: 'Emami Mar 14', ema_df_qtr_6_drop.columns[5]: 'Emami Dec 13'})

ema_df_qtr_6_drop = ema_df_qtr_6_drop.rename(columns = {'Quarterly Results of Emami (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ema_df_qtr_6_drop_T = ema_df_qtr_6_drop.transpose()

# Reset index for the dataframe
ema_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ema_df_qtr_6_drop_T.rename(columns=ema_df_qtr_6_drop_T.iloc[0], inplace = True)
ema_df_qtr_6_drop_T = ema_df_qtr_6_drop_T.drop(ema_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Emami
ema_df = pd.concat([ema_df_qtr_1_drop_T, ema_df_qtr_2_drop_T], ignore_index=True)
ema_df = pd.concat([ema_df, ema_df_qtr_3_drop_T], ignore_index=True)
ema_df = pd.concat([ema_df, ema_df_qtr_4_drop_T], ignore_index=True)
ema_df = pd.concat([ema_df, ema_df_qtr_5_drop_T], ignore_index=True)
ema_df = pd.concat([ema_df, ema_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
ema_df_r = ema_df.iloc[::-1]
ema_df_r = ema_df_r.reset_index(drop=True)
ema_df_r

# Merge two DataFrames by index using pandas.merge()
ema_df_r_price = pd.merge(ema_df_r, prices_df_qtr_emami, left_index=True, right_index=True)
ema_df_r_price = pd.merge(ema_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
ema_df_r_price = pd.merge(ema_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
ema_df_r_price = pd.merge(ema_df_r_price, crude_close ,  left_index=True, right_index=True)
ema_df_final = ema_df_r_price.drop(['EMAMILTD.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
ema_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_7 = ema_df_final.iloc[:split_point]
# Create test data set
data_test_7 = ema_df_final.iloc[split_point:]



#### 08. Marico Industries

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_mar_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/2#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_mar_qtr_1)


# Pandas Dataframe
mar_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
mar_df_qtr_1.drop(mar_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_1.rename(columns=mar_df_qtr_1.iloc[0], inplace = True)
mar_df_qtr_1_drop = mar_df_qtr_1.drop(mar_df_qtr_1.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_1_drop = mar_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
mar_df_qtr_1_drop = mar_df_qtr_1_drop.rename(columns = {'Mar 20':'Marico Mar 20'})
mar_df_qtr_1_drop = mar_df_qtr_1_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_1_drop_T = mar_df_qtr_1_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_1_drop_T.rename(columns=mar_df_qtr_1_drop_T.iloc[0], inplace = True)
mar_df_qtr_1_drop_T = mar_df_qtr_1_drop_T.drop(mar_df_qtr_1_drop_T.index[[0]])


xhtml_mar_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/3#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_mar_qtr_2)

# Pandas Dataframe
mar_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
mar_df_qtr_2.drop(mar_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_2.rename(columns=mar_df_qtr_2.iloc[0], inplace = True)
mar_df_qtr_2_drop = mar_df_qtr_2.drop(mar_df_qtr_2.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_2_drop = mar_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
mar_df_qtr_2_drop = mar_df_qtr_2_drop.rename(columns={mar_df_qtr_2_drop.columns[1]: 'Marico Dec 19', mar_df_qtr_2_drop.columns[2]: 'Marico Sep 19',
                            mar_df_qtr_2_drop.columns[3]: 'Marico Jun 19', mar_df_qtr_2_drop.columns[4]: 'Marico Mar 19', mar_df_qtr_2_drop.columns[5]: 'Marico Dec 18'})


mar_df_qtr_2_drop = mar_df_qtr_2_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_2_drop_T = mar_df_qtr_2_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_2_drop_T.rename(columns=mar_df_qtr_2_drop_T.iloc[0], inplace = True)
mar_df_qtr_2_drop_T = mar_df_qtr_2_drop_T.drop(mar_df_qtr_2_drop_T.index[[0]])

xhtml_mar_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/4#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_mar_qtr_3)

# Pandas Dataframe
mar_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
mar_df_qtr_3.drop(mar_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_3.rename(columns=mar_df_qtr_3.iloc[0], inplace = True)
mar_df_qtr_3_drop = mar_df_qtr_3.drop(mar_df_qtr_3.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_3_drop = mar_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
mar_df_qtr_3_drop

# Rename header to include company name
mar_df_qtr_3_drop = mar_df_qtr_3_drop.rename(columns={mar_df_qtr_3_drop.columns[1]: 'Marico Sep 18', mar_df_qtr_3_drop.columns[2]: 'Marico Jun 18',
                            mar_df_qtr_3_drop.columns[3]: 'Marico Mar 18', mar_df_qtr_3_drop.columns[4]: 'Marico Dec 17', mar_df_qtr_3_drop.columns[5]: 'Marico Sep 17'})

mar_df_qtr_3_drop = mar_df_qtr_3_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_3_drop_T = mar_df_qtr_3_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_3_drop_T.rename(columns=mar_df_qtr_3_drop_T.iloc[0], inplace = True)
mar_df_qtr_3_drop_T = mar_df_qtr_3_drop_T.drop(mar_df_qtr_3_drop_T.index[[0]])



xhtml_mar_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/5#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_mar_qtr_4)

# Pandas Dataframe

mar_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
mar_df_qtr_4.drop(mar_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_4.rename(columns=mar_df_qtr_4.iloc[0], inplace = True)
mar_df_qtr_4_drop = mar_df_qtr_4.drop(mar_df_qtr_4.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_4_drop = mar_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
mar_df_qtr_4_drop

# Rename header to include company name
mar_df_qtr_4_drop = mar_df_qtr_4_drop.rename(columns={mar_df_qtr_4_drop.columns[1]: 'Marico Jun 17', mar_df_qtr_4_drop.columns[2]: 'Marico Mar 17',
                            mar_df_qtr_4_drop.columns[3]: 'Marico Dec 16', mar_df_qtr_4_drop.columns[4]: 'Marico Sep 16', mar_df_qtr_4_drop.columns[5]: 'Marico Jun 16'})

mar_df_qtr_4_drop = mar_df_qtr_4_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_4_drop_T = mar_df_qtr_4_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_4_drop_T.rename(columns=mar_df_qtr_4_drop_T.iloc[0], inplace = True)
mar_df_qtr_4_drop_T = mar_df_qtr_4_drop_T.drop(mar_df_qtr_4_drop_T.index[[0]])

xhtml_mar_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/6#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_mar_qtr_5)

# Pandas Dataframe

mar_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
mar_df_qtr_5.drop(mar_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_5.rename(columns=mar_df_qtr_5.iloc[0], inplace = True)
mar_df_qtr_5_drop = mar_df_qtr_5.drop(mar_df_qtr_5.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_5_drop = mar_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
mar_df_qtr_5_drop

# Rename header to include company name
mar_df_qtr_5_drop = mar_df_qtr_5_drop.rename(columns={mar_df_qtr_5_drop.columns[1]: 'Marico Mar 16', mar_df_qtr_5_drop.columns[2]: 'Marico Dec 15',
                            mar_df_qtr_5_drop.columns[3]: 'Marico Sep 15', mar_df_qtr_5_drop.columns[4]: 'Marico Jun 15', mar_df_qtr_5_drop.columns[5]: 'Marico Mar 15'})

mar_df_qtr_5_drop = mar_df_qtr_5_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_5_drop_T = mar_df_qtr_5_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_5_drop_T.rename(columns=mar_df_qtr_5_drop_T.iloc[0], inplace = True)
mar_df_qtr_5_drop_T = mar_df_qtr_5_drop_T.drop(mar_df_qtr_5_drop_T.index[[0]])

xhtml_mar_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/marico/results/quarterly-results/M13/7#M13').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_mar_qtr_6)

# Pandas Dataframe
mar_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
mar_df_qtr_6.drop(mar_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
mar_df_qtr_6.rename(columns=mar_df_qtr_6.iloc[0], inplace = True)
mar_df_qtr_6_drop = mar_df_qtr_6.drop(mar_df_qtr_6.index[[0]])

# Drop row data not required for analysis
mar_df_qtr_6_drop = mar_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
mar_df_qtr_6_drop

# Rename header to include company name
mar_df_qtr_6_drop = mar_df_qtr_6_drop.rename(columns={mar_df_qtr_6_drop.columns[1]: 'Marico Dec 14', mar_df_qtr_6_drop.columns[2]: 'Marico Sep 14',
                            mar_df_qtr_6_drop.columns[3]: 'Marico Jun 14', mar_df_qtr_6_drop.columns[4]: 'Marico Mar 14', mar_df_qtr_6_drop.columns[5]: 'Marico Dec 13'})

mar_df_qtr_6_drop = mar_df_qtr_6_drop.rename(columns = {'Quarterly Results of Marico (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
mar_df_qtr_6_drop_T = mar_df_qtr_6_drop.transpose()

# Reset index for the dataframe
mar_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
mar_df_qtr_6_drop_T.rename(columns=mar_df_qtr_6_drop_T.iloc[0], inplace = True)
mar_df_qtr_6_drop_T = mar_df_qtr_6_drop_T.drop(mar_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Marico
mar_df = pd.concat([mar_df_qtr_1_drop_T, mar_df_qtr_2_drop_T], ignore_index=True)
mar_df = pd.concat([mar_df, mar_df_qtr_3_drop_T], ignore_index=True)
mar_df = pd.concat([mar_df, mar_df_qtr_4_drop_T], ignore_index=True)
mar_df = pd.concat([mar_df, mar_df_qtr_5_drop_T], ignore_index=True)
mar_df = pd.concat([mar_df, mar_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
mar_df_r = mar_df.iloc[::-1]
mar_df_r = mar_df_r.reset_index(drop=True)
mar_df_r

# Merge two DataFrames by index using pandas.merge()
mar_df_r_price = pd.merge(mar_df_r, prices_df_qtr_marico, left_index=True, right_index=True)
mar_df_r_price = pd.merge(mar_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
mar_df_r_price = pd.merge(mar_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
mar_df_r_price = pd.merge(mar_df_r_price, crude_close ,  left_index=True, right_index=True)

mar_df_final = mar_df_r_price.drop(['MARICO.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
mar_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_8 = mar_df_final.iloc[:split_point]
# Create test data set
data_test_8 = mar_df_final.iloc[split_point:]



#### 09. Wipro

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_wip_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/2#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_wip_qtr_1)


# Pandas Dataframe
wip_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
wip_df_qtr_1.drop(wip_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_1.rename(columns=wip_df_qtr_1.iloc[0], inplace = True)
wip_df_qtr_1_drop = wip_df_qtr_1.drop(wip_df_qtr_1.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_1_drop = wip_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
wip_df_qtr_1_drop = wip_df_qtr_1_drop.rename(columns = {'Mar 20':'Wipro Mar 20'})
wip_df_qtr_1_drop = wip_df_qtr_1_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_1_drop_T = wip_df_qtr_1_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_1_drop_T.rename(columns=wip_df_qtr_1_drop_T.iloc[0], inplace = True)
wip_df_qtr_1_drop_T = wip_df_qtr_1_drop_T.drop(wip_df_qtr_1_drop_T.index[[0]])


xhtml_wip_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/3#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_wip_qtr_2)

# Pandas Dataframe
wip_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
wip_df_qtr_2.drop(wip_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_2.rename(columns=wip_df_qtr_2.iloc[0], inplace = True)
wip_df_qtr_2_drop = wip_df_qtr_2.drop(wip_df_qtr_2.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_2_drop = wip_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
wip_df_qtr_2_drop = wip_df_qtr_2_drop.rename(columns={wip_df_qtr_2_drop.columns[1]: 'Wipro Dec 19', wip_df_qtr_2_drop.columns[2]: 'Wipro Sep 19',
                            wip_df_qtr_2_drop.columns[3]: 'Wipro Jun 19', wip_df_qtr_2_drop.columns[4]: 'Wipro Mar 19', wip_df_qtr_2_drop.columns[5]: 'Wipro Dec 18'})


wip_df_qtr_2_drop = wip_df_qtr_2_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_2_drop_T = wip_df_qtr_2_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_2_drop_T.rename(columns=wip_df_qtr_2_drop_T.iloc[0], inplace = True)
wip_df_qtr_2_drop_T = wip_df_qtr_2_drop_T.drop(wip_df_qtr_2_drop_T.index[[0]])

xhtml_wip_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/4#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_wip_qtr_3)

# Pandas Dataframe
wip_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
wip_df_qtr_3.drop(wip_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_3.rename(columns=wip_df_qtr_3.iloc[0], inplace = True)
wip_df_qtr_3_drop = wip_df_qtr_3.drop(wip_df_qtr_3.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_3_drop = wip_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
wip_df_qtr_3_drop

# Rename header to include company name
wip_df_qtr_3_drop = wip_df_qtr_3_drop.rename(columns={wip_df_qtr_3_drop.columns[1]: 'Wipro Sep 18', wip_df_qtr_3_drop.columns[2]: 'Wipro Jun 18',
                            wip_df_qtr_3_drop.columns[3]: 'Wipro Mar 18', wip_df_qtr_3_drop.columns[4]: 'Wipro Dec 17', wip_df_qtr_3_drop.columns[5]: 'Wipro Sep 17'})

wip_df_qtr_3_drop = wip_df_qtr_3_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_3_drop_T = wip_df_qtr_3_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_3_drop_T.rename(columns=wip_df_qtr_3_drop_T.iloc[0], inplace = True)
wip_df_qtr_3_drop_T = wip_df_qtr_3_drop_T.drop(wip_df_qtr_3_drop_T.index[[0]])



xhtml_wip_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/5#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_wip_qtr_4)

# Pandas Dataframe

wip_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
wip_df_qtr_4.drop(wip_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_4.rename(columns=wip_df_qtr_4.iloc[0], inplace = True)
wip_df_qtr_4_drop = wip_df_qtr_4.drop(wip_df_qtr_4.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_4_drop = wip_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
wip_df_qtr_4_drop

# Rename header to include company name
wip_df_qtr_4_drop = wip_df_qtr_4_drop.rename(columns={wip_df_qtr_4_drop.columns[1]: 'Wipro Jun 17', wip_df_qtr_4_drop.columns[2]: 'Wipro Mar 17',
                            wip_df_qtr_4_drop.columns[3]: 'Wipro Dec 16', wip_df_qtr_4_drop.columns[4]: 'Wipro Sep 16', wip_df_qtr_4_drop.columns[5]: 'Wipro Jun 16'})

wip_df_qtr_4_drop = wip_df_qtr_4_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_4_drop_T = wip_df_qtr_4_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_4_drop_T.rename(columns=wip_df_qtr_4_drop_T.iloc[0], inplace = True)
wip_df_qtr_4_drop_T = wip_df_qtr_4_drop_T.drop(wip_df_qtr_4_drop_T.index[[0]])

xhtml_wip_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/6#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_wip_qtr_5)

# Pandas Dataframe

wip_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
wip_df_qtr_5.drop(wip_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_5.rename(columns=wip_df_qtr_5.iloc[0], inplace = True)
wip_df_qtr_5_drop = wip_df_qtr_5.drop(wip_df_qtr_5.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_5_drop = wip_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
wip_df_qtr_5_drop

# Rename header to include company name
wip_df_qtr_5_drop = wip_df_qtr_5_drop.rename(columns={wip_df_qtr_5_drop.columns[1]: 'Wipro Mar 16', wip_df_qtr_5_drop.columns[2]: 'Wipro Dec 15',
                            wip_df_qtr_5_drop.columns[3]: 'Wipro Sep 15', wip_df_qtr_5_drop.columns[4]: 'Wipro Jun 15', wip_df_qtr_5_drop.columns[5]: 'Wipro Mar 15'})

wip_df_qtr_5_drop = wip_df_qtr_5_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_5_drop_T = wip_df_qtr_5_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_5_drop_T.rename(columns=wip_df_qtr_5_drop_T.iloc[0], inplace = True)
wip_df_qtr_5_drop_T = wip_df_qtr_5_drop_T.drop(wip_df_qtr_5_drop_T.index[[0]])

xhtml_wip_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/wipro/results/quarterly-results/W/7#W').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_wip_qtr_6)

# Pandas Dataframe
wip_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
wip_df_qtr_6.drop(wip_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
wip_df_qtr_6.rename(columns=wip_df_qtr_6.iloc[0], inplace = True)
wip_df_qtr_6_drop = wip_df_qtr_6.drop(wip_df_qtr_6.index[[0]])

# Drop row data not required for analysis
wip_df_qtr_6_drop = wip_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
wip_df_qtr_6_drop

# Rename header to include company name
wip_df_qtr_6_drop = wip_df_qtr_6_drop.rename(columns={wip_df_qtr_6_drop.columns[1]: 'Wipro Dec 14', wip_df_qtr_6_drop.columns[2]: 'Wipro Sep 14',
                            wip_df_qtr_6_drop.columns[3]: 'Wipro Jun 14', wip_df_qtr_6_drop.columns[4]: 'Wipro Mar 14', wip_df_qtr_6_drop.columns[5]: 'Wipro Dec 13'})

wip_df_qtr_6_drop = wip_df_qtr_6_drop.rename(columns = {'Quarterly Results of Wipro (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
wip_df_qtr_6_drop_T = wip_df_qtr_6_drop.transpose()

# Reset index for the dataframe
wip_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
wip_df_qtr_6_drop_T.rename(columns=wip_df_qtr_6_drop_T.iloc[0], inplace = True)
wip_df_qtr_6_drop_T = wip_df_qtr_6_drop_T.drop(wip_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Wipro
wip_df = pd.concat([wip_df_qtr_1_drop_T, wip_df_qtr_2_drop_T], ignore_index=True)
wip_df = pd.concat([wip_df, wip_df_qtr_3_drop_T], ignore_index=True)
wip_df = pd.concat([wip_df, wip_df_qtr_4_drop_T], ignore_index=True)
wip_df = pd.concat([wip_df, wip_df_qtr_5_drop_T], ignore_index=True)
wip_df = pd.concat([wip_df, wip_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
wip_df_r = wip_df.iloc[::-1]
wip_df_r = wip_df_r.reset_index(drop=True)
wip_df_r

# Merge two DataFrames by index using pandas.merge()
wip_df_r_price = pd.merge(wip_df_r, prices_df_qtr_wipro, left_index=True, right_index=True)
wip_df_r_price = pd.merge(wip_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
wip_df_r_price = pd.merge(wip_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
wip_df_r_price = pd.merge(wip_df_r_price, crude_close ,  left_index=True, right_index=True)

wip_df_final = wip_df_r_price.drop(['WIPRO.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
wip_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_9 = wip_df_final.iloc[:split_point]
# Create test data set
data_test_9 = wip_df_final.iloc[split_point:]

#### 10. Godrej Consumer Products Ltd

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_god_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/2#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_god_qtr_1)


# Pandas Dataframe
god_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
god_df_qtr_1.drop(god_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_1.rename(columns=god_df_qtr_1.iloc[0], inplace = True)
god_df_qtr_1_drop = god_df_qtr_1.drop(god_df_qtr_1.index[[0]])

# Drop row data not required for analysis
god_df_qtr_1_drop = god_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
god_df_qtr_1_drop = god_df_qtr_1_drop.rename(columns = {'Mar 20':'Godrej Mar 20'})
god_df_qtr_1_drop = god_df_qtr_1_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_1_drop_T = god_df_qtr_1_drop.transpose()

# Reset index for the dataframe
god_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_1_drop_T.rename(columns=god_df_qtr_1_drop_T.iloc[0], inplace = True)
god_df_qtr_1_drop_T = god_df_qtr_1_drop_T.drop(god_df_qtr_1_drop_T.index[[0]])


xhtml_god_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/3#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_god_qtr_2)

# Pandas Dataframe
god_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
god_df_qtr_2.drop(god_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_2.rename(columns=god_df_qtr_2.iloc[0], inplace = True)
god_df_qtr_2_drop = god_df_qtr_2.drop(god_df_qtr_2.index[[0]])

# Drop row data not required for analysis
god_df_qtr_2_drop = god_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
god_df_qtr_2_drop = god_df_qtr_2_drop.rename(columns={god_df_qtr_2_drop.columns[1]: 'Godrej Dec 19', god_df_qtr_2_drop.columns[2]: 'Godrej Sep 19',
                            god_df_qtr_2_drop.columns[3]: 'Godrej Jun 19', god_df_qtr_2_drop.columns[4]: 'Godrej Mar 19', god_df_qtr_2_drop.columns[5]: 'Godrej Dec 18'})


god_df_qtr_2_drop = god_df_qtr_2_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_2_drop_T = god_df_qtr_2_drop.transpose()

# Reset index for the dataframe
god_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_2_drop_T.rename(columns=god_df_qtr_2_drop_T.iloc[0], inplace = True)
god_df_qtr_2_drop_T = god_df_qtr_2_drop_T.drop(god_df_qtr_2_drop_T.index[[0]])

xhtml_god_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/4#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_god_qtr_3)

# Pandas Dataframe
god_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
god_df_qtr_3.drop(god_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_3.rename(columns=god_df_qtr_3.iloc[0], inplace = True)
god_df_qtr_3_drop = god_df_qtr_3.drop(god_df_qtr_3.index[[0]])

# Drop row data not required for analysis
god_df_qtr_3_drop = god_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
god_df_qtr_3_drop

# Rename header to include company name
god_df_qtr_3_drop = god_df_qtr_3_drop.rename(columns={god_df_qtr_3_drop.columns[1]: 'Godrej Sep 18', god_df_qtr_3_drop.columns[2]: 'Godrej Jun 18',
                            god_df_qtr_3_drop.columns[3]: 'Godrej Mar 18', god_df_qtr_3_drop.columns[4]: 'Godrej Dec 17', god_df_qtr_3_drop.columns[5]: 'Godrej Sep 17'})

god_df_qtr_3_drop = god_df_qtr_3_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_3_drop_T = god_df_qtr_3_drop.transpose()

# Reset index for the dataframe
god_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_3_drop_T.rename(columns=god_df_qtr_3_drop_T.iloc[0], inplace = True)
god_df_qtr_3_drop_T = god_df_qtr_3_drop_T.drop(god_df_qtr_3_drop_T.index[[0]])



xhtml_god_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/5#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_god_qtr_4)

# Pandas Dataframe

god_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
god_df_qtr_4.drop(god_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_4.rename(columns=god_df_qtr_4.iloc[0], inplace = True)
god_df_qtr_4_drop = god_df_qtr_4.drop(god_df_qtr_4.index[[0]])

# Drop row data not required for analysis
god_df_qtr_4_drop = god_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
god_df_qtr_4_drop

# Rename header to include company name
god_df_qtr_4_drop = god_df_qtr_4_drop.rename(columns={god_df_qtr_4_drop.columns[1]: 'Godrej Jun 17', god_df_qtr_4_drop.columns[2]: 'Godrej Mar 17',
                            god_df_qtr_4_drop.columns[3]: 'Godrej Dec 16', god_df_qtr_4_drop.columns[4]: 'Godrej Sep 16', god_df_qtr_4_drop.columns[5]: 'Godrej Jun 16'})

god_df_qtr_4_drop = god_df_qtr_4_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_4_drop_T = god_df_qtr_4_drop.transpose()

# Reset index for the dataframe
god_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_4_drop_T.rename(columns=god_df_qtr_4_drop_T.iloc[0], inplace = True)
god_df_qtr_4_drop_T = god_df_qtr_4_drop_T.drop(god_df_qtr_4_drop_T.index[[0]])

xhtml_god_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/6#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_god_qtr_5)

# Pandas Dataframe

god_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
god_df_qtr_5.drop(god_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_5.rename(columns=god_df_qtr_5.iloc[0], inplace = True)
god_df_qtr_5_drop = god_df_qtr_5.drop(god_df_qtr_5.index[[0]])

# Drop row data not required for analysis
god_df_qtr_5_drop = god_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
god_df_qtr_5_drop

# Rename header to include company name
god_df_qtr_5_drop = god_df_qtr_5_drop.rename(columns={god_df_qtr_5_drop.columns[1]: 'Godrej Mar 16', god_df_qtr_5_drop.columns[2]: 'Godrej Dec 15',
                            god_df_qtr_5_drop.columns[3]: 'Godrej Sep 15', god_df_qtr_5_drop.columns[4]: 'Godrej Jun 15', god_df_qtr_5_drop.columns[5]: 'Godrej Mar 15'})

god_df_qtr_5_drop = god_df_qtr_5_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_5_drop_T = god_df_qtr_5_drop.transpose()

# Reset index for the dataframe
god_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_5_drop_T.rename(columns=god_df_qtr_5_drop_T.iloc[0], inplace = True)
god_df_qtr_5_drop_T = god_df_qtr_5_drop_T.drop(god_df_qtr_5_drop_T.index[[0]])

xhtml_god_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/godrejconsumerproducts/results/quarterly-results/GCP/7#GCP').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_god_qtr_6)

# Pandas Dataframe
god_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
god_df_qtr_6.drop(god_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
god_df_qtr_6.rename(columns=god_df_qtr_6.iloc[0], inplace = True)
god_df_qtr_6_drop = god_df_qtr_6.drop(god_df_qtr_6.index[[0]])

# Drop row data not required for analysis
god_df_qtr_6_drop = god_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
god_df_qtr_6_drop

# Rename header to include company name
god_df_qtr_6_drop = god_df_qtr_6_drop.rename(columns={god_df_qtr_6_drop.columns[1]: 'Godrej Dec 14', god_df_qtr_6_drop.columns[2]: 'Godrej Sep 14',
                            god_df_qtr_6_drop.columns[3]: 'Godrej Jun 14', god_df_qtr_6_drop.columns[4]: 'Godrej Mar 14', god_df_qtr_6_drop.columns[5]: 'Godrej Dec 13'})

god_df_qtr_6_drop = god_df_qtr_6_drop.rename(columns = {'Quarterly Results of Godrej Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
god_df_qtr_6_drop_T = god_df_qtr_6_drop.transpose()

# Reset index for the dataframe
god_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
god_df_qtr_6_drop_T.rename(columns=god_df_qtr_6_drop_T.iloc[0], inplace = True)
god_df_qtr_6_drop_T = god_df_qtr_6_drop_T.drop(god_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Wipro
god_df = pd.concat([god_df_qtr_1_drop_T, god_df_qtr_2_drop_T], ignore_index=True)
god_df = pd.concat([god_df, god_df_qtr_3_drop_T], ignore_index=True)
god_df = pd.concat([god_df, god_df_qtr_4_drop_T], ignore_index=True)
god_df = pd.concat([god_df, god_df_qtr_5_drop_T], ignore_index=True)
god_df = pd.concat([god_df, god_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
god_df_r = god_df.iloc[::-1]
god_df_r = god_df_r.reset_index(drop=True)
god_df_r

# Merge two DataFrames by index using pandas.merge()
god_df_r_price = pd.merge(god_df_r, prices_df_qtr_god, left_index=True, right_index=True)
god_df_r_price = pd.merge(god_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
god_df_r_price = pd.merge(god_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
god_df_r_price = pd.merge(god_df_r_price, crude_close ,  left_index=True, right_index=True)

god_df_final = god_df_r_price.drop(['GODREJCP.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
god_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_10 = god_df_final.iloc[:split_point]
# Create test data set
data_test_10 = god_df_final.iloc[split_point:]

#### 11. Godfrey Phillips

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_godfreyphillips_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/2#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_godfreyphillips_qtr_1)


# Pandas Dataframe
godfreyphillips_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
godfreyphillips_df_qtr_1.drop(godfreyphillips_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_1.rename(columns=godfreyphillips_df_qtr_1.iloc[0], inplace = True)
godfreyphillips_df_qtr_1_drop = godfreyphillips_df_qtr_1.drop(godfreyphillips_df_qtr_1.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_1_drop = godfreyphillips_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40
                                                                          ,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
godfreyphillips_df_qtr_1_drop = godfreyphillips_df_qtr_1_drop.rename(columns = {'Mar 20':'Godfrey Phillips Mar 20'})
godfreyphillips_df_qtr_1_drop = godfreyphillips_df_qtr_1_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_1_drop_T = godfreyphillips_df_qtr_1_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_1_drop_T.rename(columns=godfreyphillips_df_qtr_1_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_1_drop_T = godfreyphillips_df_qtr_1_drop_T.drop(godfreyphillips_df_qtr_1_drop_T.index[[0]])

xhtml_godfreyphillips_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/3#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_godfreyphillips_qtr_2)

# Pandas Dataframe
godfreyphillips_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
godfreyphillips_df_qtr_2.drop(godfreyphillips_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_2.rename(columns=godfreyphillips_df_qtr_2.iloc[0], inplace = True)
godfreyphillips_df_qtr_2_drop = godfreyphillips_df_qtr_2.drop(godfreyphillips_df_qtr_2.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_2_drop = godfreyphillips_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])

# Rename header to include company name
godfreyphillips_df_qtr_2_drop = godfreyphillips_df_qtr_2_drop.rename(columns={godfreyphillips_df_qtr_2_drop.columns[1]: 'Godfrey Phillips Dec 19', godfreyphillips_df_qtr_2_drop.columns[2]: 'Godfrey Phillips Sep 19',
                            godfreyphillips_df_qtr_2_drop.columns[3]: 'Godfrey Phillips Jun 19', godfreyphillips_df_qtr_2_drop.columns[4]: 'Godfrey Phillips Mar 19', godfreyphillips_df_qtr_2_drop.columns[5]: 'Godfrey Phillips Dec 18'})
#godfreyphillips_df_qtr_2_drop = godfreyphillips_df_qtr_2_drop.rename(columns = {'Mar 20':'Godfrey Phillips Mar 20'})

godfreyphillips_df_qtr_2_drop = godfreyphillips_df_qtr_2_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_2_drop_T = godfreyphillips_df_qtr_2_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_2_drop_T.rename(columns=godfreyphillips_df_qtr_2_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_2_drop_T = godfreyphillips_df_qtr_2_drop_T.drop(godfreyphillips_df_qtr_2_drop_T.index[[0]])

xhtml_godfreyphillips_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/4#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_godfreyphillips_qtr_3)

# Pandas Dataframe
godfreyphillips_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
godfreyphillips_df_qtr_3.drop(godfreyphillips_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_3.rename(columns=godfreyphillips_df_qtr_3.iloc[0], inplace = True)
godfreyphillips_df_qtr_3_drop = godfreyphillips_df_qtr_3.drop(godfreyphillips_df_qtr_3.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_3_drop = godfreyphillips_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
godfreyphillips_df_qtr_3_drop

# Rename header to include company name
godfreyphillips_df_qtr_3_drop = godfreyphillips_df_qtr_3_drop.rename(columns={godfreyphillips_df_qtr_3_drop.columns[1]: 'Godfrey Phillips Sep 18', godfreyphillips_df_qtr_3_drop.columns[2]: 'Godfrey Phillips Jun 18',
                            godfreyphillips_df_qtr_3_drop.columns[3]: 'Godfrey Phillips Mar 18', godfreyphillips_df_qtr_3_drop.columns[4]: 'Godfrey Phillips Dec 17', godfreyphillips_df_qtr_3_drop.columns[5]: 'Godfrey Phillips Sep 17'})

godfreyphillips_df_qtr_3_drop = godfreyphillips_df_qtr_3_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_3_drop_T = godfreyphillips_df_qtr_3_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_3_drop_T.rename(columns=godfreyphillips_df_qtr_3_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_3_drop_T = godfreyphillips_df_qtr_3_drop_T.drop(godfreyphillips_df_qtr_3_drop_T.index[[0]])

xhtml_godfreyphillips_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/5#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_godfreyphillips_qtr_4)

# Pandas Dataframe

godfreyphillips_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
godfreyphillips_df_qtr_4.drop(godfreyphillips_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_4.rename(columns=godfreyphillips_df_qtr_4.iloc[0], inplace = True)
godfreyphillips_df_qtr_4_drop = godfreyphillips_df_qtr_4.drop(godfreyphillips_df_qtr_4.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_4_drop = godfreyphillips_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
godfreyphillips_df_qtr_4_drop

# Rename header to include company name
godfreyphillips_df_qtr_4_drop = godfreyphillips_df_qtr_4_drop.rename(columns={godfreyphillips_df_qtr_4_drop.columns[1]: 'Godfrey Phillips Jun 17', godfreyphillips_df_qtr_4_drop.columns[2]: 'Godfrey Phillips Mar 17',
                           godfreyphillips_df_qtr_4_drop.columns[3]: 'Godfrey Phillips Dec 16', godfreyphillips_df_qtr_4_drop.columns[4]: 'Godfrey Phillips Sep 16', godfreyphillips_df_qtr_4_drop.columns[5]: 'Godfrey Phillips Jun 16'})

godfreyphillips_df_qtr_4_drop = godfreyphillips_df_qtr_4_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_4_drop_T = godfreyphillips_df_qtr_4_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_4_drop_T.rename(columns=godfreyphillips_df_qtr_4_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_4_drop_T = godfreyphillips_df_qtr_4_drop_T.drop(godfreyphillips_df_qtr_4_drop_T.index[[0]])

xhtml_godfreyphillips_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/6#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_godfreyphillips_qtr_5)

# Pandas Dataframe

godfreyphillips_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
godfreyphillips_df_qtr_5.drop(godfreyphillips_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_5.rename(columns=godfreyphillips_df_qtr_5.iloc[0], inplace = True)
godfreyphillips_df_qtr_5_drop = godfreyphillips_df_qtr_5.drop(godfreyphillips_df_qtr_5.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_5_drop = godfreyphillips_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
godfreyphillips_df_qtr_5_drop

# Rename header to include company name
godfreyphillips_df_qtr_5_drop = godfreyphillips_df_qtr_5_drop.rename(columns={godfreyphillips_df_qtr_5_drop.columns[1]: 'Godfrey Phillips Mar 16', godfreyphillips_df_qtr_5_drop.columns[2]: 'Godfrey Phillips Dec 15',
                            godfreyphillips_df_qtr_5_drop.columns[3]: 'Godfrey Phillips Sep 15', godfreyphillips_df_qtr_5_drop.columns[4]: 'Godfrey Phillips Jun 15', godfreyphillips_df_qtr_5_drop.columns[5]: 'Godfrey Phillips Mar 15'})

godfreyphillips_df_qtr_5_drop = godfreyphillips_df_qtr_5_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_5_drop_T = godfreyphillips_df_qtr_5_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_5_drop_T.rename(columns=godfreyphillips_df_qtr_5_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_5_drop_T = godfreyphillips_df_qtr_5_drop_T.drop(godfreyphillips_df_qtr_5_drop_T.index[[0]])

xhtml_godfreyphillips_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/godfreyphillipsindia/results/quarterly-results/GPI/7#GPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_godfreyphillips_qtr_6)

# Pandas Dataframe
godfreyphillips_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
godfreyphillips_df_qtr_6.drop(godfreyphillips_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_6.rename(columns=godfreyphillips_df_qtr_6.iloc[0], inplace = True)
godfreyphillips_df_qtr_6_drop = godfreyphillips_df_qtr_6.drop(godfreyphillips_df_qtr_6.index[[0]])

# Drop row data not required for analysis
godfreyphillips_df_qtr_6_drop = godfreyphillips_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
godfreyphillips_df_qtr_6_drop

# Rename header to include company name
godfreyphillips_df_qtr_6_drop = godfreyphillips_df_qtr_6_drop.rename(columns={godfreyphillips_df_qtr_6_drop.columns[1]: 'Godfrey Phillips Dec 14', godfreyphillips_df_qtr_6_drop.columns[2]: 'Godfrey Phillips Sep 14',
                            godfreyphillips_df_qtr_6_drop.columns[3]: 'Godfrey Phillips Jun 14', godfreyphillips_df_qtr_6_drop.columns[4]: 'Godfrey Phillips Mar 14', godfreyphillips_df_qtr_6_drop.columns[5]: 'Godfrey Phillips Dec 13'})

godfreyphillips_df_qtr_6_drop = godfreyphillips_df_qtr_6_drop.rename(columns = {'Quarterly Results of Godfrey Phillips India (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
godfreyphillips_df_qtr_6_drop_T = godfreyphillips_df_qtr_6_drop.transpose()

# Reset index for the dataframe
godfreyphillips_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
godfreyphillips_df_qtr_6_drop_T.rename(columns=godfreyphillips_df_qtr_6_drop_T.iloc[0], inplace = True)
godfreyphillips_df_qtr_6_drop_T = godfreyphillips_df_qtr_6_drop_T.drop(godfreyphillips_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Godfrey Phillips
godfreyphillips_df = pd.concat([godfreyphillips_df_qtr_1_drop_T, godfreyphillips_df_qtr_2_drop_T], ignore_index=True)
godfreyphillips_df = pd.concat([godfreyphillips_df, godfreyphillips_df_qtr_3_drop_T], ignore_index=True)
godfreyphillips_df = pd.concat([godfreyphillips_df, godfreyphillips_df_qtr_4_drop_T], ignore_index=True)
godfreyphillips_df = pd.concat([godfreyphillips_df, godfreyphillips_df_qtr_5_drop_T], ignore_index=True)
godfreyphillips_df = pd.concat([godfreyphillips_df, godfreyphillips_df_qtr_6_drop_T], ignore_index=True)

# Reverse the rows of the dataframe
godfreyphillips_df_r = godfreyphillips_df.iloc[::-1]
godfreyphillips_df_r = godfreyphillips_df_r.reset_index(drop=True)
godfreyphillips_df_r

# Merge two DataFrames by index using pandas.merge()
godfreyphillips_df_r_price = pd.merge(godfreyphillips_df_r, prices_df_qtr_gfp, left_index=True, right_index=True)
godfreyphillips_df_r_price = pd.merge(godfreyphillips_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
godfreyphillips_df_r_price = pd.merge(godfreyphillips_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
godfreyphillips_df_r_price = pd.merge(godfreyphillips_df_r_price, crude_close ,  left_index=True, right_index=True)

godfreyphillips_df_final = godfreyphillips_df_r_price.drop(['GODFRYPHLP.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
godfreyphillips_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_11 = godfreyphillips_df_final.iloc[:split_point]
# Create test data set
data_test_11 = godfreyphillips_df_final.iloc[split_point:]



#### 12. TATA Consumer Products

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_tata_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/2#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_tata_qtr_1)


# Pandas Dataframe
tata_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
tata_df_qtr_1.drop(tata_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_1.rename(columns=tata_df_qtr_1.iloc[0], inplace = True)
tata_df_qtr_1_drop = tata_df_qtr_1.drop(tata_df_qtr_1.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_1_drop = tata_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
tata_df_qtr_1_drop = tata_df_qtr_1_drop.rename(columns = {'Mar 20':'Tata Consumer Products Mar 20'})
tata_df_qtr_1_drop = tata_df_qtr_1_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_1_drop_T = tata_df_qtr_1_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_1_drop_T.rename(columns=tata_df_qtr_1_drop_T.iloc[0], inplace = True)
tata_df_qtr_1_drop_T = tata_df_qtr_1_drop_T.drop(tata_df_qtr_1_drop_T.index[[0]])


xhtml_tata_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/3#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_tata_qtr_2)

# Pandas Dataframe
tata_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
tata_df_qtr_2.drop(tata_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_2.rename(columns=tata_df_qtr_2.iloc[0], inplace = True)
tata_df_qtr_2_drop = tata_df_qtr_2.drop(tata_df_qtr_2.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_2_drop = tata_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
tata_df_qtr_2_drop = tata_df_qtr_2_drop.rename(columns={tata_df_qtr_2_drop.columns[1]: 'Tata Consumer Products Dec 19', tata_df_qtr_2_drop.columns[2]: 'Tata Consumer Products Sep 19',
                            tata_df_qtr_2_drop.columns[3]: 'Tata Consumer Products Jun 19', tata_df_qtr_2_drop.columns[4]: 'Tata Consumer Products Mar 19', tata_df_qtr_2_drop.columns[5]: 'Tata Consumer Products Dec 18'})


tata_df_qtr_2_drop = tata_df_qtr_2_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_2_drop_T = tata_df_qtr_2_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_2_drop_T.rename(columns=tata_df_qtr_2_drop_T.iloc[0], inplace = True)
tata_df_qtr_2_drop_T = tata_df_qtr_2_drop_T.drop(tata_df_qtr_2_drop_T.index[[0]])

xhtml_tata_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/4#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_tata_qtr_3)

# Pandas Dataframe
tata_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
tata_df_qtr_3.drop(tata_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_3.rename(columns=tata_df_qtr_3.iloc[0], inplace = True)
tata_df_qtr_3_drop = tata_df_qtr_3.drop(tata_df_qtr_3.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_3_drop = tata_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
tata_df_qtr_3_drop

# Rename header to include company name
tata_df_qtr_3_drop = tata_df_qtr_3_drop.rename(columns={tata_df_qtr_3_drop.columns[1]: 'Tata Consumer Products Sep 18', tata_df_qtr_3_drop.columns[2]: 'Tata Consumer Products Jun 18',
                            tata_df_qtr_3_drop.columns[3]: 'Tata Consumer Products Mar 18', tata_df_qtr_3_drop.columns[4]: 'Tata Consumer Products Dec 17', tata_df_qtr_3_drop.columns[5]: 'Tata Consumer Products Sep 17'})

tata_df_qtr_3_drop = tata_df_qtr_3_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_3_drop_T = tata_df_qtr_3_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_3_drop_T.rename(columns=tata_df_qtr_3_drop_T.iloc[0], inplace = True)
tata_df_qtr_3_drop_T = tata_df_qtr_3_drop_T.drop(tata_df_qtr_3_drop_T.index[[0]])



xhtml_tata_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/5#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_tata_qtr_4)

# Pandas Dataframe

tata_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
tata_df_qtr_4.drop(tata_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_4.rename(columns=tata_df_qtr_4.iloc[0], inplace = True)
tata_df_qtr_4_drop = tata_df_qtr_4.drop(tata_df_qtr_4.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_4_drop = tata_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
tata_df_qtr_4_drop

# Rename header to include company name
tata_df_qtr_4_drop = tata_df_qtr_4_drop.rename(columns={tata_df_qtr_4_drop.columns[1]: 'Tata Consumer Products Jun 17', tata_df_qtr_4_drop.columns[2]: 'Tata Consumer Products Mar 17',
                            tata_df_qtr_4_drop.columns[3]: 'Tata Consumer Products Dec 16', tata_df_qtr_4_drop.columns[4]: 'Tata Consumer Products Sep 16', tata_df_qtr_4_drop.columns[5]: 'Tata Consumer Products Jun 16'})

tata_df_qtr_4_drop = tata_df_qtr_4_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_4_drop_T = tata_df_qtr_4_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_4_drop_T.rename(columns=tata_df_qtr_4_drop_T.iloc[0], inplace = True)
tata_df_qtr_4_drop_T = tata_df_qtr_4_drop_T.drop(tata_df_qtr_4_drop_T.index[[0]])

xhtml_tata_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/6#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_tata_qtr_5)

# Pandas Dataframe

tata_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
tata_df_qtr_5.drop(tata_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_5.rename(columns=tata_df_qtr_5.iloc[0], inplace = True)
tata_df_qtr_5_drop = tata_df_qtr_5.drop(tata_df_qtr_5.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_5_drop = tata_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
tata_df_qtr_5_drop

# Rename header to include company name
tata_df_qtr_5_drop = tata_df_qtr_5_drop.rename(columns={tata_df_qtr_5_drop.columns[1]: 'Tata Consumer Products Mar 16', tata_df_qtr_5_drop.columns[2]: 'Tata Consumer Products Dec 15',
                            tata_df_qtr_5_drop.columns[3]: 'Tata Consumer Products Sep 15', tata_df_qtr_5_drop.columns[4]: 'Tata Consumer Products Jun 15', tata_df_qtr_5_drop.columns[5]: 'Tata Consumer Products Mar 15'})

tata_df_qtr_5_drop = tata_df_qtr_5_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_5_drop_T = tata_df_qtr_5_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_5_drop_T.rename(columns=tata_df_qtr_5_drop_T.iloc[0], inplace = True)
tata_df_qtr_5_drop_T = tata_df_qtr_5_drop_T.drop(tata_df_qtr_5_drop_T.index[[0]])

xhtml_tata_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/tataconsumerproducts/results/quarterly-results/TT/7#TT').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_tata_qtr_6)

# Pandas Dataframe
tata_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
tata_df_qtr_6.drop(tata_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
tata_df_qtr_6.rename(columns=tata_df_qtr_6.iloc[0], inplace = True)
tata_df_qtr_6_drop = tata_df_qtr_6.drop(tata_df_qtr_6.index[[0]])

# Drop row data not required for analysis
tata_df_qtr_6_drop = tata_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
tata_df_qtr_6_drop

# Rename header to include company name
tata_df_qtr_6_drop = tata_df_qtr_6_drop.rename(columns={tata_df_qtr_6_drop.columns[1]: 'Tata Consumer Products Dec 14', tata_df_qtr_6_drop.columns[2]: 'Tata Consumer Products Sep 14',
                            tata_df_qtr_6_drop.columns[3]: 'Tata Consumer Products Jun 14', tata_df_qtr_6_drop.columns[4]: 'Tata Consumer Products Mar 14', tata_df_qtr_6_drop.columns[5]: 'Tata Consumer Products Dec 13'})

tata_df_qtr_6_drop = tata_df_qtr_6_drop.rename(columns = {'Quarterly Results of TATA Consumer Products (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
tata_df_qtr_6_drop_T = tata_df_qtr_6_drop.transpose()

# Reset index for the dataframe
tata_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
tata_df_qtr_6_drop_T.rename(columns=tata_df_qtr_6_drop_T.iloc[0], inplace = True)
tata_df_qtr_6_drop_T = tata_df_qtr_6_drop_T.drop(tata_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Tata Consumer Products
tata_df = pd.concat([tata_df_qtr_1_drop_T, tata_df_qtr_2_drop_T], ignore_index=True)
tata_df = pd.concat([tata_df, tata_df_qtr_3_drop_T], ignore_index=True)
tata_df = pd.concat([tata_df, tata_df_qtr_4_drop_T], ignore_index=True)
tata_df = pd.concat([tata_df, tata_df_qtr_5_drop_T], ignore_index=True)
tata_df = pd.concat([tata_df, tata_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
tata_df_r = tata_df.iloc[::-1]
tata_df_r = tata_df_r.reset_index(drop=True)
tata_df_r

# Merge two DataFrames by index using pandas.merge()
tata_df_r_price = pd.merge(tata_df_r, prices_df_qtr_tata, left_index=True, right_index=True)
tata_df_r_price = pd.merge(tata_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
tata_df_r_price = pd.merge(tata_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
tata_df_r_price = pd.merge(tata_df_r_price, crude_close ,  left_index=True, right_index=True)

tata_df_final = tata_df_r_price.drop(['TATACONSUM.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
tata_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_12 = tata_df_final.iloc[:split_point]
# Create test data set
data_test_12 = tata_df_final.iloc[split_point:]


#### 13. Jubilant Food

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_jubilantfoodworks_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/2#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_jubilantfoodworks_qtr_1)


# Pandas Dataframe
jubilantfoodworks_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
jubilantfoodworks_df_qtr_1.drop(jubilantfoodworks_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_1.rename(columns=jubilantfoodworks_df_qtr_1.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_1_drop = jubilantfoodworks_df_qtr_1.drop(jubilantfoodworks_df_qtr_1.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_1_drop = jubilantfoodworks_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40
                                                                          ,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
jubilantfoodworks_df_qtr_1_drop = jubilantfoodworks_df_qtr_1_drop.rename(columns = {'Mar 20':'Jubilant Food Mar 20'})
jubilantfoodworks_df_qtr_1_drop = jubilantfoodworks_df_qtr_1_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_1_drop_T = jubilantfoodworks_df_qtr_1_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_1_drop_T.rename(columns=jubilantfoodworks_df_qtr_1_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_1_drop_T = jubilantfoodworks_df_qtr_1_drop_T.drop(jubilantfoodworks_df_qtr_1_drop_T.index[[0]])

xhtml_jubilantfoodworks_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/3#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_jubilantfoodworks_qtr_2)

# Pandas Dataframe
jubilantfoodworks_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
jubilantfoodworks_df_qtr_2.drop(jubilantfoodworks_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_2.rename(columns=jubilantfoodworks_df_qtr_2.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_2_drop = jubilantfoodworks_df_qtr_2.drop(jubilantfoodworks_df_qtr_2.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_2_drop = jubilantfoodworks_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])

# Rename header to include company name
jubilantfoodworks_df_qtr_2_drop = jubilantfoodworks_df_qtr_2_drop.rename(columns={jubilantfoodworks_df_qtr_2_drop.columns[1]: 'Jubilant Food Dec 19', jubilantfoodworks_df_qtr_2_drop.columns[2]: 'Jubilant Food Sep 19',
                            jubilantfoodworks_df_qtr_2_drop.columns[3]: 'Jubilant Food Jun 19', jubilantfoodworks_df_qtr_2_drop.columns[4]: 'Jubilant Food Mar 19', jubilantfoodworks_df_qtr_2_drop.columns[5]: 'Jubilant Food Dec 18'})
#jubilantfoodworks_df_qtr_2_drop = jubilantfoodworks_df_qtr_2_drop.rename(columns = {'Mar 20':'Jubilant Food Mar 20'})

jubilantfoodworks_df_qtr_2_drop = jubilantfoodworks_df_qtr_2_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_2_drop_T = jubilantfoodworks_df_qtr_2_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_2_drop_T.rename(columns=jubilantfoodworks_df_qtr_2_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_2_drop_T = jubilantfoodworks_df_qtr_2_drop_T.drop(jubilantfoodworks_df_qtr_2_drop_T.index[[0]])

xhtml_jubilantfoodworks_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/4#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_jubilantfoodworks_qtr_3)

# Pandas Dataframe
jubilantfoodworks_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
jubilantfoodworks_df_qtr_3.drop(jubilantfoodworks_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_3.rename(columns=jubilantfoodworks_df_qtr_3.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_3_drop = jubilantfoodworks_df_qtr_3.drop(jubilantfoodworks_df_qtr_3.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_3_drop = jubilantfoodworks_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
jubilantfoodworks_df_qtr_3_drop

# Rename header to include company name
jubilantfoodworks_df_qtr_3_drop = jubilantfoodworks_df_qtr_3_drop.rename(columns={jubilantfoodworks_df_qtr_3_drop.columns[1]: 'Jubilant Food Sep 18', jubilantfoodworks_df_qtr_3_drop.columns[2]: 'Jubilant Food Jun 18',
                            jubilantfoodworks_df_qtr_3_drop.columns[3]: 'Jubilant Food Mar 18', jubilantfoodworks_df_qtr_3_drop.columns[4]: 'Jubilant Food Dec 17', jubilantfoodworks_df_qtr_3_drop.columns[5]: 'Jubilant Food Sep 17'})

jubilantfoodworks_df_qtr_3_drop = jubilantfoodworks_df_qtr_3_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_3_drop_T = jubilantfoodworks_df_qtr_3_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_3_drop_T.rename(columns=jubilantfoodworks_df_qtr_3_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_3_drop_T = jubilantfoodworks_df_qtr_3_drop_T.drop(jubilantfoodworks_df_qtr_3_drop_T.index[[0]])

xhtml_jubilantfoodworks_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/5#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_jubilantfoodworks_qtr_4)

# Pandas Dataframe

jubilantfoodworks_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
jubilantfoodworks_df_qtr_4.drop(jubilantfoodworks_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_4.rename(columns=jubilantfoodworks_df_qtr_4.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_4_drop = jubilantfoodworks_df_qtr_4.drop(jubilantfoodworks_df_qtr_4.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_4_drop = jubilantfoodworks_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
jubilantfoodworks_df_qtr_4_drop

# Rename header to include company name
jubilantfoodworks_df_qtr_4_drop = jubilantfoodworks_df_qtr_4_drop.rename(columns={jubilantfoodworks_df_qtr_4_drop.columns[1]: 'Jubilant Food Jun 17', jubilantfoodworks_df_qtr_4_drop.columns[2]: 'Jubilant Food Mar 17',
                           jubilantfoodworks_df_qtr_4_drop.columns[3]: 'Jubilant Food Dec 16', jubilantfoodworks_df_qtr_4_drop.columns[4]: 'Jubilant Food Sep 16', jubilantfoodworks_df_qtr_4_drop.columns[5]: 'Jubilant Food Jun 16'})

jubilantfoodworks_df_qtr_4_drop = jubilantfoodworks_df_qtr_4_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_4_drop_T = jubilantfoodworks_df_qtr_4_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_4_drop_T.rename(columns=jubilantfoodworks_df_qtr_4_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_4_drop_T = jubilantfoodworks_df_qtr_4_drop_T.drop(jubilantfoodworks_df_qtr_4_drop_T.index[[0]])

xhtml_jubilantfoodworks_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/6#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_jubilantfoodworks_qtr_5)

# Pandas Dataframe

jubilantfoodworks_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
jubilantfoodworks_df_qtr_5.drop(jubilantfoodworks_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_5.rename(columns=jubilantfoodworks_df_qtr_5.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_5_drop = jubilantfoodworks_df_qtr_5.drop(jubilantfoodworks_df_qtr_5.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_5_drop = jubilantfoodworks_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
jubilantfoodworks_df_qtr_5_drop

# Rename header to include company name
jubilantfoodworks_df_qtr_5_drop = jubilantfoodworks_df_qtr_5_drop.rename(columns={jubilantfoodworks_df_qtr_5_drop.columns[1]: 'Jubilant Food Mar 16', jubilantfoodworks_df_qtr_5_drop.columns[2]: 'Jubilant Food Dec 15',
                            jubilantfoodworks_df_qtr_5_drop.columns[3]: 'Jubilant Food Sep 15', jubilantfoodworks_df_qtr_5_drop.columns[4]: 'Jubilant Food Jun 15', jubilantfoodworks_df_qtr_5_drop.columns[5]: 'Jubilant Food Mar 15'})

jubilantfoodworks_df_qtr_5_drop = jubilantfoodworks_df_qtr_5_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_5_drop_T = jubilantfoodworks_df_qtr_5_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_5_drop_T.rename(columns=jubilantfoodworks_df_qtr_5_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_5_drop_T = jubilantfoodworks_df_qtr_5_drop_T.drop(jubilantfoodworks_df_qtr_5_drop_T.index[[0]])

xhtml_jubilantfoodworks_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/jubilantfoodworks/results/quarterly-results/JF04/7#JF04').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_jubilantfoodworks_qtr_6)

# Pandas Dataframe
jubilantfoodworks_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
jubilantfoodworks_df_qtr_6.drop(jubilantfoodworks_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_6.rename(columns=jubilantfoodworks_df_qtr_6.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_6_drop = jubilantfoodworks_df_qtr_6.drop(jubilantfoodworks_df_qtr_6.index[[0]])

# Drop row data not required for analysis
jubilantfoodworks_df_qtr_6_drop = jubilantfoodworks_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,
                                                                          41,42,43,44,45,46,47,48,49])
jubilantfoodworks_df_qtr_6_drop

# Rename header to include company name
jubilantfoodworks_df_qtr_6_drop = jubilantfoodworks_df_qtr_6_drop.rename(columns={jubilantfoodworks_df_qtr_6_drop.columns[1]: 'Jubilant Food Dec 14', jubilantfoodworks_df_qtr_6_drop.columns[2]: 'Jubilant Food Sep 14',
                            jubilantfoodworks_df_qtr_6_drop.columns[3]: 'Jubilant Food Jun 14', jubilantfoodworks_df_qtr_6_drop.columns[4]: 'Jubilant Food Mar 14', jubilantfoodworks_df_qtr_6_drop.columns[5]: 'Jubilant Food Dec 13'})

jubilantfoodworks_df_qtr_6_drop = jubilantfoodworks_df_qtr_6_drop.rename(columns = {'Quarterly Results of Jubilant Foodworks (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
jubilantfoodworks_df_qtr_6_drop_T = jubilantfoodworks_df_qtr_6_drop.transpose()

# Reset index for the dataframe
jubilantfoodworks_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
jubilantfoodworks_df_qtr_6_drop_T.rename(columns=jubilantfoodworks_df_qtr_6_drop_T.iloc[0], inplace = True)
jubilantfoodworks_df_qtr_6_drop_T = jubilantfoodworks_df_qtr_6_drop_T.drop(jubilantfoodworks_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Jubilant Food
jubilantfoodworks_df = pd.concat([jubilantfoodworks_df_qtr_1_drop_T, jubilantfoodworks_df_qtr_2_drop_T], ignore_index=True)
jubilantfoodworks_df = pd.concat([jubilantfoodworks_df, jubilantfoodworks_df_qtr_3_drop_T], ignore_index=True)
jubilantfoodworks_df = pd.concat([jubilantfoodworks_df, jubilantfoodworks_df_qtr_4_drop_T], ignore_index=True)
jubilantfoodworks_df = pd.concat([jubilantfoodworks_df, jubilantfoodworks_df_qtr_5_drop_T], ignore_index=True)
jubilantfoodworks_df = pd.concat([jubilantfoodworks_df, jubilantfoodworks_df_qtr_6_drop_T], ignore_index=True)

# Reverse the rows of the dataframe
jubilantfoodworks_df_r = jubilantfoodworks_df.iloc[::-1]
jubilantfoodworks_df_r = jubilantfoodworks_df_r.reset_index(drop=True)
jubilantfoodworks_df_r

# Merge two DataFrames by index using pandas.merge()
jubilantfoodworks_df_r_price = pd.merge(jubilantfoodworks_df_r, prices_df_qtr_jub, left_index=True, right_index=True)
jubilantfoodworks_df_r_price = pd.merge(jubilantfoodworks_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
jubilantfoodworks_df_r_price = pd.merge(jubilantfoodworks_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
jubilantfoodworks_df_r_price = pd.merge(jubilantfoodworks_df_r_price, crude_close ,  left_index=True, right_index=True)

jubilantfoodworks_df_final = jubilantfoodworks_df_r_price.drop(['JUBLFOOD.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
jubilantfoodworks_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_13 = jubilantfoodworks_df_final.iloc[:split_point]
# Create test data set
data_test_13 = jubilantfoodworks_df_final.iloc[split_point:]



#### 14. United Brewerie

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_unitedbreweries_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/2#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_unitedbreweries_qtr_1)


# Pandas Dataframe
unitedbreweries_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
unitedbreweries_df_qtr_1.drop(unitedbreweries_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_1.rename(columns=unitedbreweries_df_qtr_1.iloc[0], inplace = True)
unitedbreweries_df_qtr_1_drop = unitedbreweries_df_qtr_1.drop(unitedbreweries_df_qtr_1.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_1_drop = unitedbreweries_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
unitedbreweries_df_qtr_1_drop = unitedbreweries_df_qtr_1_drop.rename(columns = {'Mar 20':'United Brewerie Mar 20'})
unitedbreweries_df_qtr_1_drop = unitedbreweries_df_qtr_1_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_1_drop_T = unitedbreweries_df_qtr_1_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_1_drop_T.rename(columns=unitedbreweries_df_qtr_1_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_1_drop_T = unitedbreweries_df_qtr_1_drop_T.drop(unitedbreweries_df_qtr_1_drop_T.index[[0]])


xhtml_unitedbreweries_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/3#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_unitedbreweries_qtr_2)

# Pandas Dataframe
unitedbreweries_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
unitedbreweries_df_qtr_2.drop(unitedbreweries_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_2.rename(columns=unitedbreweries_df_qtr_2.iloc[0], inplace = True)
unitedbreweries_df_qtr_2_drop = unitedbreweries_df_qtr_2.drop(unitedbreweries_df_qtr_2.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_2_drop = unitedbreweries_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
unitedbreweries_df_qtr_2_drop = unitedbreweries_df_qtr_2_drop.rename(columns={unitedbreweries_df_qtr_2_drop.columns[1]: 'United Brewerie Dec 19', unitedbreweries_df_qtr_2_drop.columns[2]: 'United Brewerie Sep 19',
                            unitedbreweries_df_qtr_2_drop.columns[3]: 'United Brewerie Jun 19', unitedbreweries_df_qtr_2_drop.columns[4]: 'United Brewerie Mar 19', unitedbreweries_df_qtr_2_drop.columns[5]: 'United Brewerie Dec 18'})


unitedbreweries_df_qtr_2_drop = unitedbreweries_df_qtr_2_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_2_drop_T = unitedbreweries_df_qtr_2_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_2_drop_T.rename(columns=unitedbreweries_df_qtr_2_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_2_drop_T = unitedbreweries_df_qtr_2_drop_T.drop(unitedbreweries_df_qtr_2_drop_T.index[[0]])

xhtml_unitedbreweries_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/4#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_unitedbreweries_qtr_3)

# Pandas Dataframe
unitedbreweries_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
unitedbreweries_df_qtr_3.drop(unitedbreweries_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_3.rename(columns=unitedbreweries_df_qtr_3.iloc[0], inplace = True)
unitedbreweries_df_qtr_3_drop = unitedbreweries_df_qtr_3.drop(unitedbreweries_df_qtr_3.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_3_drop = unitedbreweries_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
unitedbreweries_df_qtr_3_drop

# Rename header to include company name
unitedbreweries_df_qtr_3_drop = unitedbreweries_df_qtr_3_drop.rename(columns={unitedbreweries_df_qtr_3_drop.columns[1]: 'United Brewerie Sep 18', unitedbreweries_df_qtr_3_drop.columns[2]: 'United Brewerie Jun 18',
                            unitedbreweries_df_qtr_3_drop.columns[3]: 'United Brewerie Mar 18', unitedbreweries_df_qtr_3_drop.columns[4]: 'United Brewerie Dec 17', unitedbreweries_df_qtr_3_drop.columns[5]: 'United Brewerie Sep 17'})

unitedbreweries_df_qtr_3_drop = unitedbreweries_df_qtr_3_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_3_drop_T = unitedbreweries_df_qtr_3_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_3_drop_T.rename(columns=unitedbreweries_df_qtr_3_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_3_drop_T = unitedbreweries_df_qtr_3_drop_T.drop(unitedbreweries_df_qtr_3_drop_T.index[[0]])



xhtml_unitedbreweries_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/5#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_unitedbreweries_qtr_4)

# Pandas Dataframe

unitedbreweries_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
unitedbreweries_df_qtr_4.drop(unitedbreweries_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_4.rename(columns=unitedbreweries_df_qtr_4.iloc[0], inplace = True)
unitedbreweries_df_qtr_4_drop = unitedbreweries_df_qtr_4.drop(unitedbreweries_df_qtr_4.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_4_drop = unitedbreweries_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
unitedbreweries_df_qtr_4_drop

# Rename header to include company name
unitedbreweries_df_qtr_4_drop = unitedbreweries_df_qtr_4_drop.rename(columns={unitedbreweries_df_qtr_4_drop.columns[1]: 'United Brewerie Jun 17', unitedbreweries_df_qtr_4_drop.columns[2]: 'United Brewerie Mar 17',
                            unitedbreweries_df_qtr_4_drop.columns[3]: 'United Brewerie Dec 16', unitedbreweries_df_qtr_4_drop.columns[4]: 'United Brewerie Sep 16', unitedbreweries_df_qtr_4_drop.columns[5]: 'United Brewerie Jun 16'})

unitedbreweries_df_qtr_4_drop = unitedbreweries_df_qtr_4_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_4_drop_T = unitedbreweries_df_qtr_4_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_4_drop_T.rename(columns=unitedbreweries_df_qtr_4_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_4_drop_T = unitedbreweries_df_qtr_4_drop_T.drop(unitedbreweries_df_qtr_4_drop_T.index[[0]])

xhtml_unitedbreweries_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/6#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_unitedbreweries_qtr_5)

# Pandas Dataframe

unitedbreweries_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
unitedbreweries_df_qtr_5.drop(unitedbreweries_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_5.rename(columns=unitedbreweries_df_qtr_5.iloc[0], inplace = True)
unitedbreweries_df_qtr_5_drop = unitedbreweries_df_qtr_5.drop(unitedbreweries_df_qtr_5.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_5_drop = unitedbreweries_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
unitedbreweries_df_qtr_5_drop

# Rename header to include company name
unitedbreweries_df_qtr_5_drop = unitedbreweries_df_qtr_5_drop.rename(columns={unitedbreweries_df_qtr_5_drop.columns[1]: 'United Brewerie Mar 16', unitedbreweries_df_qtr_5_drop.columns[2]: 'United Brewerie Dec 15',
                            unitedbreweries_df_qtr_5_drop.columns[3]: 'United Brewerie Sep 15', unitedbreweries_df_qtr_5_drop.columns[4]: 'United Brewerie Jun 15', unitedbreweries_df_qtr_5_drop.columns[5]: 'United Brewerie Mar 15'})

unitedbreweries_df_qtr_5_drop = unitedbreweries_df_qtr_5_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_5_drop_T = unitedbreweries_df_qtr_5_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_5_drop_T.rename(columns=unitedbreweries_df_qtr_5_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_5_drop_T = unitedbreweries_df_qtr_5_drop_T.drop(unitedbreweries_df_qtr_5_drop_T.index[[0]])

xhtml_unitedbreweries_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/unitedbreweries/results/quarterly-results/UB02/7#UB02').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_unitedbreweries_qtr_6)

# Pandas Dataframe
unitedbreweries_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
unitedbreweries_df_qtr_6.drop(unitedbreweries_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_6.rename(columns=unitedbreweries_df_qtr_6.iloc[0], inplace = True)
unitedbreweries_df_qtr_6_drop = unitedbreweries_df_qtr_6.drop(unitedbreweries_df_qtr_6.index[[0]])

# Drop row data not required for analysis
unitedbreweries_df_qtr_6_drop = unitedbreweries_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
unitedbreweries_df_qtr_6_drop

# Rename header to include company name
unitedbreweries_df_qtr_6_drop = unitedbreweries_df_qtr_6_drop.rename(columns={unitedbreweries_df_qtr_6_drop.columns[1]: 'United Brewerie Dec 14', unitedbreweries_df_qtr_6_drop.columns[2]: 'United Brewerie Sep 14',
                            unitedbreweries_df_qtr_6_drop.columns[3]: 'United Brewerie Jun 14', unitedbreweries_df_qtr_6_drop.columns[4]: 'United Brewerie Mar 14', unitedbreweries_df_qtr_6_drop.columns[5]: 'United Brewerie Dec 13'})

unitedbreweries_df_qtr_6_drop = unitedbreweries_df_qtr_6_drop.rename(columns = {'Quarterly Results of United Breweries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
unitedbreweries_df_qtr_6_drop_T = unitedbreweries_df_qtr_6_drop.transpose()

# Reset index for the dataframe
unitedbreweries_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
unitedbreweries_df_qtr_6_drop_T.rename(columns=unitedbreweries_df_qtr_6_drop_T.iloc[0], inplace = True)
unitedbreweries_df_qtr_6_drop_T = unitedbreweries_df_qtr_6_drop_T.drop(unitedbreweries_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for United Brewerie
unitedbreweries_df = pd.concat([unitedbreweries_df_qtr_1_drop_T, unitedbreweries_df_qtr_2_drop_T], ignore_index=True)
unitedbreweries_df = pd.concat([unitedbreweries_df, unitedbreweries_df_qtr_3_drop_T], ignore_index=True)
unitedbreweries_df = pd.concat([unitedbreweries_df, unitedbreweries_df_qtr_4_drop_T], ignore_index=True)
unitedbreweries_df = pd.concat([unitedbreweries_df, unitedbreweries_df_qtr_5_drop_T], ignore_index=True)
unitedbreweries_df = pd.concat([unitedbreweries_df, unitedbreweries_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
unitedbreweries_df_r = unitedbreweries_df.iloc[::-1]
unitedbreweries_df_r = unitedbreweries_df_r.reset_index(drop=True)
unitedbreweries_df_r

# Merge two DataFrames by index using pandas.merge()
unitedbreweries_df_r_price = pd.merge(unitedbreweries_df_r, prices_df_qtr_ubl, left_index=True, right_index=True)
unitedbreweries_df_r_price = pd.merge(unitedbreweries_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
unitedbreweries_df_r_price = pd.merge(unitedbreweries_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
unitedbreweries_df_r_price = pd.merge(unitedbreweries_df_r_price, crude_close ,  left_index=True, right_index=True)

unitedbreweries_df_final = unitedbreweries_df_r_price.drop(['UBL.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
unitedbreweries_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_14 = unitedbreweries_df_final.iloc[:split_point]
# Create test data set
data_test_14 = unitedbreweries_df_final.iloc[split_point:]

#### 15. Avanti Feeds

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_avantifeeds_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/2#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_avantifeeds_qtr_1)


# Pandas Dataframe
avantifeeds_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
avantifeeds_df_qtr_1.drop(avantifeeds_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_1.rename(columns=avantifeeds_df_qtr_1.iloc[0], inplace = True)
avantifeeds_df_qtr_1_drop = avantifeeds_df_qtr_1.drop(avantifeeds_df_qtr_1.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_1_drop = avantifeeds_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
avantifeeds_df_qtr_1_drop = avantifeeds_df_qtr_1_drop.rename(columns = {'Mar 20':'Avanti Feeds Mar 20'})
avantifeeds_df_qtr_1_drop = avantifeeds_df_qtr_1_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_1_drop_T = avantifeeds_df_qtr_1_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_1_drop_T.rename(columns=avantifeeds_df_qtr_1_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_1_drop_T = avantifeeds_df_qtr_1_drop_T.drop(avantifeeds_df_qtr_1_drop_T.index[[0]])


xhtml_avantifeeds_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/3#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_avantifeeds_qtr_2)

# Pandas Dataframe
avantifeeds_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
avantifeeds_df_qtr_2.drop(avantifeeds_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_2.rename(columns=avantifeeds_df_qtr_2.iloc[0], inplace = True)
avantifeeds_df_qtr_2_drop = avantifeeds_df_qtr_2.drop(avantifeeds_df_qtr_2.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_2_drop = avantifeeds_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
avantifeeds_df_qtr_2_drop = avantifeeds_df_qtr_2_drop.rename(columns={avantifeeds_df_qtr_2_drop.columns[1]: 'Avanti Feeds Dec 19', avantifeeds_df_qtr_2_drop.columns[2]: 'Avanti Feeds Sep 19',
                            avantifeeds_df_qtr_2_drop.columns[3]: 'Avanti Feeds Jun 19', avantifeeds_df_qtr_2_drop.columns[4]: 'Avanti Feeds Mar 19', avantifeeds_df_qtr_2_drop.columns[5]: 'Avanti Feeds Dec 18'})


avantifeeds_df_qtr_2_drop = avantifeeds_df_qtr_2_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_2_drop_T = avantifeeds_df_qtr_2_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_2_drop_T.rename(columns=avantifeeds_df_qtr_2_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_2_drop_T = avantifeeds_df_qtr_2_drop_T.drop(avantifeeds_df_qtr_2_drop_T.index[[0]])

xhtml_avantifeeds_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/4#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_avantifeeds_qtr_3)

# Pandas Dataframe
avantifeeds_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
avantifeeds_df_qtr_3.drop(avantifeeds_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_3.rename(columns=avantifeeds_df_qtr_3.iloc[0], inplace = True)
avantifeeds_df_qtr_3_drop = avantifeeds_df_qtr_3.drop(avantifeeds_df_qtr_3.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_3_drop = avantifeeds_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
avantifeeds_df_qtr_3_drop

# Rename header to include company name
avantifeeds_df_qtr_3_drop = avantifeeds_df_qtr_3_drop.rename(columns={avantifeeds_df_qtr_3_drop.columns[1]: 'Avanti Feeds Sep 18', avantifeeds_df_qtr_3_drop.columns[2]: 'Avanti Feeds Jun 18',
                            avantifeeds_df_qtr_3_drop.columns[3]: 'Avanti Feeds Mar 18', avantifeeds_df_qtr_3_drop.columns[4]: 'Avanti Feeds Dec 17', avantifeeds_df_qtr_3_drop.columns[5]: 'Avanti Feeds Sep 17'})

avantifeeds_df_qtr_3_drop = avantifeeds_df_qtr_3_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_3_drop_T = avantifeeds_df_qtr_3_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_3_drop_T.rename(columns=avantifeeds_df_qtr_3_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_3_drop_T = avantifeeds_df_qtr_3_drop_T.drop(avantifeeds_df_qtr_3_drop_T.index[[0]])



xhtml_avantifeeds_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/5#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_avantifeeds_qtr_4)

# Pandas Dataframe

avantifeeds_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
avantifeeds_df_qtr_4.drop(avantifeeds_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_4.rename(columns=avantifeeds_df_qtr_4.iloc[0], inplace = True)
avantifeeds_df_qtr_4_drop = avantifeeds_df_qtr_4.drop(avantifeeds_df_qtr_4.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_4_drop = avantifeeds_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
avantifeeds_df_qtr_4_drop

# Rename header to include company name
avantifeeds_df_qtr_4_drop = avantifeeds_df_qtr_4_drop.rename(columns={avantifeeds_df_qtr_4_drop.columns[1]: 'Avanti Feeds Jun 17', avantifeeds_df_qtr_4_drop.columns[2]: 'Avanti Feeds Mar 17',
                            avantifeeds_df_qtr_4_drop.columns[3]: 'Avanti Feeds Dec 16', avantifeeds_df_qtr_4_drop.columns[4]: 'Avanti Feeds Sep 16', avantifeeds_df_qtr_4_drop.columns[5]: 'Avanti Feeds Jun 16'})

avantifeeds_df_qtr_4_drop = avantifeeds_df_qtr_4_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_4_drop_T = avantifeeds_df_qtr_4_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_4_drop_T.rename(columns=avantifeeds_df_qtr_4_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_4_drop_T = avantifeeds_df_qtr_4_drop_T.drop(avantifeeds_df_qtr_4_drop_T.index[[0]])

xhtml_avantifeeds_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/6#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_avantifeeds_qtr_5)

# Pandas Dataframe

avantifeeds_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
avantifeeds_df_qtr_5.drop(avantifeeds_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_5.rename(columns=avantifeeds_df_qtr_5.iloc[0], inplace = True)
avantifeeds_df_qtr_5_drop = avantifeeds_df_qtr_5.drop(avantifeeds_df_qtr_5.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_5_drop = avantifeeds_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
avantifeeds_df_qtr_5_drop

# Rename header to include company name
avantifeeds_df_qtr_5_drop = avantifeeds_df_qtr_5_drop.rename(columns={avantifeeds_df_qtr_5_drop.columns[1]: 'Avanti Feeds Mar 16', avantifeeds_df_qtr_5_drop.columns[2]: 'Avanti Feeds Dec 15',
                            avantifeeds_df_qtr_5_drop.columns[3]: 'Avanti Feeds Sep 15', avantifeeds_df_qtr_5_drop.columns[4]: 'Avanti Feeds Jun 15', avantifeeds_df_qtr_5_drop.columns[5]: 'Avanti Feeds Mar 15'})

avantifeeds_df_qtr_5_drop = avantifeeds_df_qtr_5_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_5_drop_T = avantifeeds_df_qtr_5_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_5_drop_T.rename(columns=avantifeeds_df_qtr_5_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_5_drop_T = avantifeeds_df_qtr_5_drop_T.drop(avantifeeds_df_qtr_5_drop_T.index[[0]])

xhtml_avantifeeds_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/avantifeeds/results/quarterly-results/AF21/7#AF21').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_avantifeeds_qtr_6)

# Pandas Dataframe
avantifeeds_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
avantifeeds_df_qtr_6.drop(avantifeeds_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_6.rename(columns=avantifeeds_df_qtr_6.iloc[0], inplace = True)
avantifeeds_df_qtr_6_drop = avantifeeds_df_qtr_6.drop(avantifeeds_df_qtr_6.index[[0]])

# Drop row data not required for analysis
avantifeeds_df_qtr_6_drop = avantifeeds_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
avantifeeds_df_qtr_6_drop

# Rename header to include company name
avantifeeds_df_qtr_6_drop = avantifeeds_df_qtr_6_drop.rename(columns={avantifeeds_df_qtr_6_drop.columns[1]: 'Avanti Feeds Dec 14', avantifeeds_df_qtr_6_drop.columns[2]: 'Avanti Feeds Sep 14',
                            avantifeeds_df_qtr_6_drop.columns[3]: 'Avanti Feeds Jun 14', avantifeeds_df_qtr_6_drop.columns[4]: 'Avanti Feeds Mar 14', avantifeeds_df_qtr_6_drop.columns[5]: 'Avanti Feeds Dec 13'})

avantifeeds_df_qtr_6_drop = avantifeeds_df_qtr_6_drop.rename(columns = {'Quarterly Results of Avanti Feeds (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
avantifeeds_df_qtr_6_drop_T = avantifeeds_df_qtr_6_drop.transpose()

# Reset index for the dataframe
avantifeeds_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
avantifeeds_df_qtr_6_drop_T.rename(columns=avantifeeds_df_qtr_6_drop_T.iloc[0], inplace = True)
avantifeeds_df_qtr_6_drop_T = avantifeeds_df_qtr_6_drop_T.drop(avantifeeds_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Avanti Feeds
avantifeeds_df = pd.concat([avantifeeds_df_qtr_1_drop_T, avantifeeds_df_qtr_2_drop_T], ignore_index=True)
avantifeeds_df = pd.concat([avantifeeds_df, avantifeeds_df_qtr_3_drop_T], ignore_index=True)
avantifeeds_df = pd.concat([avantifeeds_df, avantifeeds_df_qtr_4_drop_T], ignore_index=True)
avantifeeds_df = pd.concat([avantifeeds_df, avantifeeds_df_qtr_5_drop_T], ignore_index=True)
avantifeeds_df = pd.concat([avantifeeds_df, avantifeeds_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
avantifeeds_df_r = avantifeeds_df.iloc[::-1]
avantifeeds_df_r = avantifeeds_df_r.reset_index(drop=True)
avantifeeds_df_r

# Merge two DataFrames by index using pandas.merge()
avantifeeds_df_r_price = pd.merge(avantifeeds_df_r, prices_df_qtr_ava, left_index=True, right_index=True)
avantifeeds_df_r_price = pd.merge(avantifeeds_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
avantifeeds_df_r_price = pd.merge(avantifeeds_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
avantifeeds_df_r_price = pd.merge(avantifeeds_df_r_price, crude_close ,  left_index=True, right_index=True)

avantifeeds_df_final = avantifeeds_df_r_price.drop(['AVANTIFEED.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
avantifeeds_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_15 = avantifeeds_df_final.iloc[:split_point]
# Create test data set
data_test_15 = avantifeeds_df_final.iloc[split_point:]

#### 16. Vadilal Industries

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_vadilalindustries_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/2#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_vadilalindustries_qtr_1)


# Pandas Dataframe
vadilalindustries_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
vadilalindustries_df_qtr_1.drop(vadilalindustries_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_1.rename(columns=vadilalindustries_df_qtr_1.iloc[0], inplace = True)
vadilalindustries_df_qtr_1_drop = vadilalindustries_df_qtr_1.drop(vadilalindustries_df_qtr_1.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_1_drop = vadilalindustries_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
vadilalindustries_df_qtr_1_drop = vadilalindustries_df_qtr_1_drop.rename(columns = {'Mar 20':'Vadilal Industries Mar 20'})
vadilalindustries_df_qtr_1_drop = vadilalindustries_df_qtr_1_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_1_drop_T = vadilalindustries_df_qtr_1_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_1_drop_T.rename(columns=vadilalindustries_df_qtr_1_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_1_drop_T = vadilalindustries_df_qtr_1_drop_T.drop(vadilalindustries_df_qtr_1_drop_T.index[[0]])


xhtml_vadilalindustries_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/3#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_vadilalindustries_qtr_2)

# Pandas Dataframe
vadilalindustries_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
vadilalindustries_df_qtr_2.drop(vadilalindustries_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_2.rename(columns=vadilalindustries_df_qtr_2.iloc[0], inplace = True)
vadilalindustries_df_qtr_2_drop = vadilalindustries_df_qtr_2.drop(vadilalindustries_df_qtr_2.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_2_drop = vadilalindustries_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
vadilalindustries_df_qtr_2_drop = vadilalindustries_df_qtr_2_drop.rename(columns={vadilalindustries_df_qtr_2_drop.columns[1]: 'Vadilal Industries Dec 19', vadilalindustries_df_qtr_2_drop.columns[2]: 'Vadilal Industries Sep 19',
                            vadilalindustries_df_qtr_2_drop.columns[3]: 'Vadilal Industries Jun 19', vadilalindustries_df_qtr_2_drop.columns[4]: 'Vadilal Industries Mar 19', vadilalindustries_df_qtr_2_drop.columns[5]: 'Vadilal Industries Dec 18'})


vadilalindustries_df_qtr_2_drop = vadilalindustries_df_qtr_2_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_2_drop_T = vadilalindustries_df_qtr_2_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_2_drop_T.rename(columns=vadilalindustries_df_qtr_2_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_2_drop_T = vadilalindustries_df_qtr_2_drop_T.drop(vadilalindustries_df_qtr_2_drop_T.index[[0]])

xhtml_vadilalindustries_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/4#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_vadilalindustries_qtr_3)

# Pandas Dataframe
vadilalindustries_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
vadilalindustries_df_qtr_3.drop(vadilalindustries_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_3.rename(columns=vadilalindustries_df_qtr_3.iloc[0], inplace = True)
vadilalindustries_df_qtr_3_drop = vadilalindustries_df_qtr_3.drop(vadilalindustries_df_qtr_3.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_3_drop = vadilalindustries_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
vadilalindustries_df_qtr_3_drop

# Rename header to include company name
vadilalindustries_df_qtr_3_drop = vadilalindustries_df_qtr_3_drop.rename(columns={vadilalindustries_df_qtr_3_drop.columns[1]: 'Vadilal Industries Sep 18', vadilalindustries_df_qtr_3_drop.columns[2]: 'Vadilal Industries Jun 18',
                            vadilalindustries_df_qtr_3_drop.columns[3]: 'Vadilal Industries Mar 18', vadilalindustries_df_qtr_3_drop.columns[4]: 'Vadilal Industries Dec 17', vadilalindustries_df_qtr_3_drop.columns[5]: 'Vadilal Industries Sep 17'})

vadilalindustries_df_qtr_3_drop = vadilalindustries_df_qtr_3_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_3_drop_T = vadilalindustries_df_qtr_3_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_3_drop_T.rename(columns=vadilalindustries_df_qtr_3_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_3_drop_T = vadilalindustries_df_qtr_3_drop_T.drop(vadilalindustries_df_qtr_3_drop_T.index[[0]])



xhtml_vadilalindustries_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/5#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_vadilalindustries_qtr_4)

# Pandas Dataframe

vadilalindustries_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
vadilalindustries_df_qtr_4.drop(vadilalindustries_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_4.rename(columns=vadilalindustries_df_qtr_4.iloc[0], inplace = True)
vadilalindustries_df_qtr_4_drop = vadilalindustries_df_qtr_4.drop(vadilalindustries_df_qtr_4.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_4_drop = vadilalindustries_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
vadilalindustries_df_qtr_4_drop

# Rename header to include company name
vadilalindustries_df_qtr_4_drop = vadilalindustries_df_qtr_4_drop.rename(columns={vadilalindustries_df_qtr_4_drop.columns[1]: 'Vadilal Industries Jun 17', vadilalindustries_df_qtr_4_drop.columns[2]: 'Vadilal Industries Mar 17',
                            vadilalindustries_df_qtr_4_drop.columns[3]: 'Vadilal Industries Dec 16', vadilalindustries_df_qtr_4_drop.columns[4]: 'Vadilal Industries Sep 16', vadilalindustries_df_qtr_4_drop.columns[5]: 'Vadilal Industries Jun 16'})

vadilalindustries_df_qtr_4_drop = vadilalindustries_df_qtr_4_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_4_drop_T = vadilalindustries_df_qtr_4_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_4_drop_T.rename(columns=vadilalindustries_df_qtr_4_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_4_drop_T = vadilalindustries_df_qtr_4_drop_T.drop(vadilalindustries_df_qtr_4_drop_T.index[[0]])

xhtml_vadilalindustries_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/6#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_vadilalindustries_qtr_5)

# Pandas Dataframe

vadilalindustries_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
vadilalindustries_df_qtr_5.drop(vadilalindustries_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_5.rename(columns=vadilalindustries_df_qtr_5.iloc[0], inplace = True)
vadilalindustries_df_qtr_5_drop = vadilalindustries_df_qtr_5.drop(vadilalindustries_df_qtr_5.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_5_drop = vadilalindustries_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
vadilalindustries_df_qtr_5_drop

# Rename header to include company name
vadilalindustries_df_qtr_5_drop = vadilalindustries_df_qtr_5_drop.rename(columns={vadilalindustries_df_qtr_5_drop.columns[1]: 'Vadilal Industries Mar 16', vadilalindustries_df_qtr_5_drop.columns[2]: 'Vadilal Industries Dec 15',
                            vadilalindustries_df_qtr_5_drop.columns[3]: 'Vadilal Industries Sep 15', vadilalindustries_df_qtr_5_drop.columns[4]: 'Vadilal Industries Jun 15', vadilalindustries_df_qtr_5_drop.columns[5]: 'Vadilal Industries Mar 15'})

vadilalindustries_df_qtr_5_drop = vadilalindustries_df_qtr_5_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_5_drop_T = vadilalindustries_df_qtr_5_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_5_drop_T.rename(columns=vadilalindustries_df_qtr_5_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_5_drop_T = vadilalindustries_df_qtr_5_drop_T.drop(vadilalindustries_df_qtr_5_drop_T.index[[0]])

xhtml_vadilalindustries_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/vadilalindustries/results/quarterly-results/VI01/7#VI01').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_vadilalindustries_qtr_6)

# Pandas Dataframe
vadilalindustries_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
vadilalindustries_df_qtr_6.drop(vadilalindustries_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_6.rename(columns=vadilalindustries_df_qtr_6.iloc[0], inplace = True)
vadilalindustries_df_qtr_6_drop = vadilalindustries_df_qtr_6.drop(vadilalindustries_df_qtr_6.index[[0]])

# Drop row data not required for analysis
vadilalindustries_df_qtr_6_drop = vadilalindustries_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
vadilalindustries_df_qtr_6_drop

# Rename header to include company name
vadilalindustries_df_qtr_6_drop = vadilalindustries_df_qtr_6_drop.rename(columns={vadilalindustries_df_qtr_6_drop.columns[1]: 'Vadilal Industries Dec 14', vadilalindustries_df_qtr_6_drop.columns[2]: 'Vadilal Industries Sep 14',
                            vadilalindustries_df_qtr_6_drop.columns[3]: 'Vadilal Industries Jun 14', vadilalindustries_df_qtr_6_drop.columns[4]: 'Vadilal Industries Mar 14', vadilalindustries_df_qtr_6_drop.columns[5]: 'Vadilal Industries Dec 13'})

vadilalindustries_df_qtr_6_drop = vadilalindustries_df_qtr_6_drop.rename(columns = {'Quarterly Results of Vadilal Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
vadilalindustries_df_qtr_6_drop_T = vadilalindustries_df_qtr_6_drop.transpose()

# Reset index for the dataframe
vadilalindustries_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
vadilalindustries_df_qtr_6_drop_T.rename(columns=vadilalindustries_df_qtr_6_drop_T.iloc[0], inplace = True)
vadilalindustries_df_qtr_6_drop_T = vadilalindustries_df_qtr_6_drop_T.drop(vadilalindustries_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Vadilal Industries
vadilalindustries_df = pd.concat([vadilalindustries_df_qtr_1_drop_T, vadilalindustries_df_qtr_2_drop_T], ignore_index=True)
vadilalindustries_df = pd.concat([vadilalindustries_df, vadilalindustries_df_qtr_3_drop_T], ignore_index=True)
vadilalindustries_df = pd.concat([vadilalindustries_df, vadilalindustries_df_qtr_4_drop_T], ignore_index=True)
vadilalindustries_df = pd.concat([vadilalindustries_df, vadilalindustries_df_qtr_5_drop_T], ignore_index=True)
vadilalindustries_df = pd.concat([vadilalindustries_df, vadilalindustries_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
vadilalindustries_df_r = vadilalindustries_df.iloc[::-1]
vadilalindustries_df_r = vadilalindustries_df_r.reset_index(drop=True)
vadilalindustries_df_r

# Merge two DataFrames by index using pandas.merge()
vadilalindustries_df_r_price = pd.merge(vadilalindustries_df_r, prices_df_qtr_vad, left_index=True, right_index=True)
vadilalindustries_df_r_price = pd.merge(vadilalindustries_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
vadilalindustries_df_r_price = pd.merge(vadilalindustries_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
vadilalindustries_df_r_price = pd.merge(vadilalindustries_df_r_price, crude_close ,  left_index=True, right_index=True)

vadilalindustries_df_final = vadilalindustries_df_r_price.drop(['VADILALIND.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
vadilalindustries_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_16 = vadilalindustries_df_final.iloc[:split_point]
# Create test data set
data_test_16 = vadilalindustries_df_final.iloc[split_point:]

#### 17. LT Foods

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_ltfoods_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/2#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_ltfoods_qtr_1)


# Pandas Dataframe
ltfoods_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
ltfoods_df_qtr_1.drop(ltfoods_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_1.rename(columns=ltfoods_df_qtr_1.iloc[0], inplace = True)
ltfoods_df_qtr_1_drop = ltfoods_df_qtr_1.drop(ltfoods_df_qtr_1.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_1_drop = ltfoods_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
ltfoods_df_qtr_1_drop = ltfoods_df_qtr_1_drop.rename(columns = {'Mar 20':'LT Foods Mar 20'})
ltfoods_df_qtr_1_drop = ltfoods_df_qtr_1_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_1_drop_T = ltfoods_df_qtr_1_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_1_drop_T.rename(columns=ltfoods_df_qtr_1_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_1_drop_T = ltfoods_df_qtr_1_drop_T.drop(ltfoods_df_qtr_1_drop_T.index[[0]])


xhtml_ltfoods_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/3#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ltfoods_qtr_2)

# Pandas Dataframe
ltfoods_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ltfoods_df_qtr_2.drop(ltfoods_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_2.rename(columns=ltfoods_df_qtr_2.iloc[0], inplace = True)
ltfoods_df_qtr_2_drop = ltfoods_df_qtr_2.drop(ltfoods_df_qtr_2.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_2_drop = ltfoods_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
ltfoods_df_qtr_2_drop = ltfoods_df_qtr_2_drop.rename(columns={ltfoods_df_qtr_2_drop.columns[1]: 'LT Foods Dec 19', ltfoods_df_qtr_2_drop.columns[2]: 'LT Foods Sep 19',
                            ltfoods_df_qtr_2_drop.columns[3]: 'LT Foods Jun 19', ltfoods_df_qtr_2_drop.columns[4]: 'LT Foods Mar 19', ltfoods_df_qtr_2_drop.columns[5]: 'LT Foods Dec 18'})


ltfoods_df_qtr_2_drop = ltfoods_df_qtr_2_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_2_drop_T = ltfoods_df_qtr_2_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_2_drop_T.rename(columns=ltfoods_df_qtr_2_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_2_drop_T = ltfoods_df_qtr_2_drop_T.drop(ltfoods_df_qtr_2_drop_T.index[[0]])

xhtml_ltfoods_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/4#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ltfoods_qtr_3)

# Pandas Dataframe
ltfoods_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ltfoods_df_qtr_3.drop(ltfoods_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_3.rename(columns=ltfoods_df_qtr_3.iloc[0], inplace = True)
ltfoods_df_qtr_3_drop = ltfoods_df_qtr_3.drop(ltfoods_df_qtr_3.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_3_drop = ltfoods_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ltfoods_df_qtr_3_drop

# Rename header to include company name
ltfoods_df_qtr_3_drop = ltfoods_df_qtr_3_drop.rename(columns={ltfoods_df_qtr_3_drop.columns[1]: 'LT Foods Sep 18', ltfoods_df_qtr_3_drop.columns[2]: 'LT Foods Jun 18',
                            ltfoods_df_qtr_3_drop.columns[3]: 'LT Foods Mar 18', ltfoods_df_qtr_3_drop.columns[4]: 'LT Foods Dec 17', ltfoods_df_qtr_3_drop.columns[5]: 'LT Foods Sep 17'})

ltfoods_df_qtr_3_drop = ltfoods_df_qtr_3_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_3_drop_T = ltfoods_df_qtr_3_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_3_drop_T.rename(columns=ltfoods_df_qtr_3_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_3_drop_T = ltfoods_df_qtr_3_drop_T.drop(ltfoods_df_qtr_3_drop_T.index[[0]])



xhtml_ltfoods_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/5#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ltfoods_qtr_4)

# Pandas Dataframe

ltfoods_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ltfoods_df_qtr_4.drop(ltfoods_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_4.rename(columns=ltfoods_df_qtr_4.iloc[0], inplace = True)
ltfoods_df_qtr_4_drop = ltfoods_df_qtr_4.drop(ltfoods_df_qtr_4.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_4_drop = ltfoods_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ltfoods_df_qtr_4_drop

# Rename header to include company name
ltfoods_df_qtr_4_drop = ltfoods_df_qtr_4_drop.rename(columns={ltfoods_df_qtr_4_drop.columns[1]: 'LT Foods Jun 17', ltfoods_df_qtr_4_drop.columns[2]: 'LT Foods Mar 17',
                            ltfoods_df_qtr_4_drop.columns[3]: 'LT Foods Dec 16', ltfoods_df_qtr_4_drop.columns[4]: 'LT Foods Sep 16', ltfoods_df_qtr_4_drop.columns[5]: 'LT Foods Jun 16'})

ltfoods_df_qtr_4_drop = ltfoods_df_qtr_4_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_4_drop_T = ltfoods_df_qtr_4_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_4_drop_T.rename(columns=ltfoods_df_qtr_4_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_4_drop_T = ltfoods_df_qtr_4_drop_T.drop(ltfoods_df_qtr_4_drop_T.index[[0]])

xhtml_ltfoods_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/6#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_ltfoods_qtr_5)

# Pandas Dataframe

ltfoods_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ltfoods_df_qtr_5.drop(ltfoods_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_5.rename(columns=ltfoods_df_qtr_5.iloc[0], inplace = True)
ltfoods_df_qtr_5_drop = ltfoods_df_qtr_5.drop(ltfoods_df_qtr_5.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_5_drop = ltfoods_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ltfoods_df_qtr_5_drop

# Rename header to include company name
ltfoods_df_qtr_5_drop = ltfoods_df_qtr_5_drop.rename(columns={ltfoods_df_qtr_5_drop.columns[1]: 'LT Foods Mar 16', ltfoods_df_qtr_5_drop.columns[2]: 'LT Foods Dec 15',
                            ltfoods_df_qtr_5_drop.columns[3]: 'LT Foods Sep 15', ltfoods_df_qtr_5_drop.columns[4]: 'LT Foods Jun 15', ltfoods_df_qtr_5_drop.columns[5]: 'LT Foods Mar 15'})

ltfoods_df_qtr_5_drop = ltfoods_df_qtr_5_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_5_drop_T = ltfoods_df_qtr_5_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_5_drop_T.rename(columns=ltfoods_df_qtr_5_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_5_drop_T = ltfoods_df_qtr_5_drop_T.drop(ltfoods_df_qtr_5_drop_T.index[[0]])

xhtml_ltfoods_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/ltfoods/results/quarterly-results/LTF/7#LTF').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_ltfoods_qtr_6)

# Pandas Dataframe
ltfoods_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
ltfoods_df_qtr_6.drop(ltfoods_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_6.rename(columns=ltfoods_df_qtr_6.iloc[0], inplace = True)
ltfoods_df_qtr_6_drop = ltfoods_df_qtr_6.drop(ltfoods_df_qtr_6.index[[0]])

# Drop row data not required for analysis
ltfoods_df_qtr_6_drop = ltfoods_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
ltfoods_df_qtr_6_drop

# Rename header to include company name
ltfoods_df_qtr_6_drop = ltfoods_df_qtr_6_drop.rename(columns={ltfoods_df_qtr_6_drop.columns[1]: 'LT Foods Dec 14', ltfoods_df_qtr_6_drop.columns[2]: 'LT Foods Sep 14',
                            ltfoods_df_qtr_6_drop.columns[3]: 'LT Foods Jun 14', ltfoods_df_qtr_6_drop.columns[4]: 'LT Foods Mar 14', ltfoods_df_qtr_6_drop.columns[5]: 'LT Foods Dec 13'})

ltfoods_df_qtr_6_drop = ltfoods_df_qtr_6_drop.rename(columns = {'Quarterly Results of LT Foods (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
ltfoods_df_qtr_6_drop_T = ltfoods_df_qtr_6_drop.transpose()

# Reset index for the dataframe
ltfoods_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
ltfoods_df_qtr_6_drop_T.rename(columns=ltfoods_df_qtr_6_drop_T.iloc[0], inplace = True)
ltfoods_df_qtr_6_drop_T = ltfoods_df_qtr_6_drop_T.drop(ltfoods_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for LT Foods
ltfoods_df = pd.concat([ltfoods_df_qtr_1_drop_T, ltfoods_df_qtr_2_drop_T], ignore_index=True)
ltfoods_df = pd.concat([ltfoods_df, ltfoods_df_qtr_3_drop_T], ignore_index=True)
ltfoods_df = pd.concat([ltfoods_df, ltfoods_df_qtr_4_drop_T], ignore_index=True)
ltfoods_df = pd.concat([ltfoods_df, ltfoods_df_qtr_5_drop_T], ignore_index=True)
ltfoods_df = pd.concat([ltfoods_df, ltfoods_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
ltfoods_df_r = ltfoods_df.iloc[::-1]
ltfoods_df_r = ltfoods_df_r.reset_index(drop=True)
ltfoods_df_r

# Merge two DataFrames by index using pandas.merge()
ltfoods_df_r_price = pd.merge(ltfoods_df_r, prices_df_qtr_daa, left_index=True, right_index=True)
ltfoods_df_r_price = pd.merge(ltfoods_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
ltfoods_df_r_price = pd.merge(ltfoods_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
ltfoods_df_r_price = pd.merge(ltfoods_df_r_price, crude_close ,  left_index=True, right_index=True)

ltfoods_df_final = ltfoods_df_r_price.drop(['DAAWAT.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
ltfoods_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_17 = ltfoods_df_final.iloc[:split_point]
# Create test data set
data_test_17 = ltfoods_df_final.iloc[split_point:]

#### 18. Colgate-Palmolive (India) Ltd.

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_colgatepalmolive_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/2#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_colgatepalmolive_qtr_1)


# Pandas Dataframe
colgatepalmolive_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
colgatepalmolive_df_qtr_1.drop(colgatepalmolive_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_1.rename(columns=colgatepalmolive_df_qtr_1.iloc[0], inplace = True)
colgatepalmolive_df_qtr_1_drop = colgatepalmolive_df_qtr_1.drop(colgatepalmolive_df_qtr_1.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_1_drop = colgatepalmolive_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
colgatepalmolive_df_qtr_1_drop = colgatepalmolive_df_qtr_1_drop.rename(columns = {'Mar 20':'Colgate-Palmolive (India) Ltd. Mar 20'})
colgatepalmolive_df_qtr_1_drop = colgatepalmolive_df_qtr_1_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_1_drop_T = colgatepalmolive_df_qtr_1_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_1_drop_T.rename(columns=colgatepalmolive_df_qtr_1_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_1_drop_T = colgatepalmolive_df_qtr_1_drop_T.drop(colgatepalmolive_df_qtr_1_drop_T.index[[0]])


xhtml_colgatepalmolive_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/3#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_colgatepalmolive_qtr_2)

# Pandas Dataframe
colgatepalmolive_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
colgatepalmolive_df_qtr_2.drop(colgatepalmolive_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_2.rename(columns=colgatepalmolive_df_qtr_2.iloc[0], inplace = True)
colgatepalmolive_df_qtr_2_drop = colgatepalmolive_df_qtr_2.drop(colgatepalmolive_df_qtr_2.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_2_drop = colgatepalmolive_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
colgatepalmolive_df_qtr_2_drop = colgatepalmolive_df_qtr_2_drop.rename(columns={colgatepalmolive_df_qtr_2_drop.columns[1]: 'Colgate-Palmolive (India) Ltd. Dec 19', colgatepalmolive_df_qtr_2_drop.columns[2]: 'Colgate-Palmolive (India) Ltd. Sep 19',
                            colgatepalmolive_df_qtr_2_drop.columns[3]: 'Colgate-Palmolive (India) Ltd. Jun 19', colgatepalmolive_df_qtr_2_drop.columns[4]: 'Colgate-Palmolive (India) Ltd. Mar 19', colgatepalmolive_df_qtr_2_drop.columns[5]: 'Colgate-Palmolive (India) Ltd. Dec 18'})


colgatepalmolive_df_qtr_2_drop = colgatepalmolive_df_qtr_2_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_2_drop_T = colgatepalmolive_df_qtr_2_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_2_drop_T.rename(columns=colgatepalmolive_df_qtr_2_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_2_drop_T = colgatepalmolive_df_qtr_2_drop_T.drop(colgatepalmolive_df_qtr_2_drop_T.index[[0]])

xhtml_colgatepalmolive_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/4#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_colgatepalmolive_qtr_3)

# Pandas Dataframe
colgatepalmolive_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
colgatepalmolive_df_qtr_3.drop(colgatepalmolive_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_3.rename(columns=colgatepalmolive_df_qtr_3.iloc[0], inplace = True)
colgatepalmolive_df_qtr_3_drop = colgatepalmolive_df_qtr_3.drop(colgatepalmolive_df_qtr_3.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_3_drop = colgatepalmolive_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
colgatepalmolive_df_qtr_3_drop

# Rename header to include company name
colgatepalmolive_df_qtr_3_drop = colgatepalmolive_df_qtr_3_drop.rename(columns={colgatepalmolive_df_qtr_3_drop.columns[1]: 'Colgate-Palmolive (India) Ltd. Sep 18', colgatepalmolive_df_qtr_3_drop.columns[2]: 'Colgate-Palmolive (India) Ltd. Jun 18',
                            colgatepalmolive_df_qtr_3_drop.columns[3]: 'Colgate-Palmolive (India) Ltd. Mar 18', colgatepalmolive_df_qtr_3_drop.columns[4]: 'Colgate-Palmolive (India) Ltd. Dec 17', colgatepalmolive_df_qtr_3_drop.columns[5]: 'Colgate-Palmolive (India) Ltd. Sep 17'})

colgatepalmolive_df_qtr_3_drop = colgatepalmolive_df_qtr_3_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_3_drop_T = colgatepalmolive_df_qtr_3_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_3_drop_T.rename(columns=colgatepalmolive_df_qtr_3_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_3_drop_T = colgatepalmolive_df_qtr_3_drop_T.drop(colgatepalmolive_df_qtr_3_drop_T.index[[0]])



xhtml_colgatepalmolive_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/5#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_colgatepalmolive_qtr_4)

# Pandas Dataframe

colgatepalmolive_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
colgatepalmolive_df_qtr_4.drop(colgatepalmolive_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_4.rename(columns=colgatepalmolive_df_qtr_4.iloc[0], inplace = True)
colgatepalmolive_df_qtr_4_drop = colgatepalmolive_df_qtr_4.drop(colgatepalmolive_df_qtr_4.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_4_drop = colgatepalmolive_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
colgatepalmolive_df_qtr_4_drop

# Rename header to include company name
colgatepalmolive_df_qtr_4_drop = colgatepalmolive_df_qtr_4_drop.rename(columns={colgatepalmolive_df_qtr_4_drop.columns[1]: 'Colgate-Palmolive (India) Ltd. Jun 17', colgatepalmolive_df_qtr_4_drop.columns[2]: 'Colgate-Palmolive (India) Ltd. Mar 17',
                            colgatepalmolive_df_qtr_4_drop.columns[3]: 'Colgate-Palmolive (India) Ltd. Dec 16', colgatepalmolive_df_qtr_4_drop.columns[4]: 'Colgate-Palmolive (India) Ltd. Sep 16', colgatepalmolive_df_qtr_4_drop.columns[5]: 'Colgate-Palmolive (India) Ltd. Jun 16'})

colgatepalmolive_df_qtr_4_drop = colgatepalmolive_df_qtr_4_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_4_drop_T = colgatepalmolive_df_qtr_4_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_4_drop_T.rename(columns=colgatepalmolive_df_qtr_4_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_4_drop_T = colgatepalmolive_df_qtr_4_drop_T.drop(colgatepalmolive_df_qtr_4_drop_T.index[[0]])

xhtml_colgatepalmolive_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/6#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_colgatepalmolive_qtr_5)

# Pandas Dataframe

colgatepalmolive_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
colgatepalmolive_df_qtr_5.drop(colgatepalmolive_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_5.rename(columns=colgatepalmolive_df_qtr_5.iloc[0], inplace = True)
colgatepalmolive_df_qtr_5_drop = colgatepalmolive_df_qtr_5.drop(colgatepalmolive_df_qtr_5.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_5_drop = colgatepalmolive_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
colgatepalmolive_df_qtr_5_drop

# Rename header to include company name
colgatepalmolive_df_qtr_5_drop = colgatepalmolive_df_qtr_5_drop.rename(columns={colgatepalmolive_df_qtr_5_drop.columns[1]: 'Colgate-Palmolive (India) Ltd. Mar 16', colgatepalmolive_df_qtr_5_drop.columns[2]: 'Colgate-Palmolive (India) Ltd. Dec 15',
                            colgatepalmolive_df_qtr_5_drop.columns[3]: 'Colgate-Palmolive (India) Ltd. Sep 15', colgatepalmolive_df_qtr_5_drop.columns[4]: 'Colgate-Palmolive (India) Ltd. Jun 15', colgatepalmolive_df_qtr_5_drop.columns[5]: 'Colgate-Palmolive (India) Ltd. Mar 15'})

colgatepalmolive_df_qtr_5_drop = colgatepalmolive_df_qtr_5_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_5_drop_T = colgatepalmolive_df_qtr_5_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_5_drop_T.rename(columns=colgatepalmolive_df_qtr_5_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_5_drop_T = colgatepalmolive_df_qtr_5_drop_T.drop(colgatepalmolive_df_qtr_5_drop_T.index[[0]])

xhtml_colgatepalmolive_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/colgatepalmolive(india)/results/quarterly-results/CPI/7#CPI').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_colgatepalmolive_qtr_6)

# Pandas Dataframe
colgatepalmolive_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
colgatepalmolive_df_qtr_6.drop(colgatepalmolive_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_6.rename(columns=colgatepalmolive_df_qtr_6.iloc[0], inplace = True)
colgatepalmolive_df_qtr_6_drop = colgatepalmolive_df_qtr_6.drop(colgatepalmolive_df_qtr_6.index[[0]])

# Drop row data not required for analysis
colgatepalmolive_df_qtr_6_drop = colgatepalmolive_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
colgatepalmolive_df_qtr_6_drop

# Rename header to include company name
colgatepalmolive_df_qtr_6_drop = colgatepalmolive_df_qtr_6_drop.rename(columns={colgatepalmolive_df_qtr_6_drop.columns[1]: 'Colgate-Palmolive (India) Ltd. Dec 14', colgatepalmolive_df_qtr_6_drop.columns[2]: 'Colgate-Palmolive (India) Ltd. Sep 14',
                            colgatepalmolive_df_qtr_6_drop.columns[3]: 'Colgate-Palmolive (India) Ltd. Jun 14', colgatepalmolive_df_qtr_6_drop.columns[4]: 'Colgate-Palmolive (India) Ltd. Mar 14', colgatepalmolive_df_qtr_6_drop.columns[5]: 'Colgate-Palmolive (India) Ltd. Dec 13'})

colgatepalmolive_df_qtr_6_drop = colgatepalmolive_df_qtr_6_drop.rename(columns = {'Quarterly Results of Colgate Palmolive (India) (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
colgatepalmolive_df_qtr_6_drop_T = colgatepalmolive_df_qtr_6_drop.transpose()

# Reset index for the dataframe
colgatepalmolive_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
colgatepalmolive_df_qtr_6_drop_T.rename(columns=colgatepalmolive_df_qtr_6_drop_T.iloc[0], inplace = True)
colgatepalmolive_df_qtr_6_drop_T = colgatepalmolive_df_qtr_6_drop_T.drop(colgatepalmolive_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Colgate-Palmolive (India) Ltd.
colgatepalmolive_df = pd.concat([colgatepalmolive_df_qtr_1_drop_T, colgatepalmolive_df_qtr_2_drop_T], ignore_index=True)
colgatepalmolive_df = pd.concat([colgatepalmolive_df, colgatepalmolive_df_qtr_3_drop_T], ignore_index=True)
colgatepalmolive_df = pd.concat([colgatepalmolive_df, colgatepalmolive_df_qtr_4_drop_T], ignore_index=True)
colgatepalmolive_df = pd.concat([colgatepalmolive_df, colgatepalmolive_df_qtr_5_drop_T], ignore_index=True)
colgatepalmolive_df = pd.concat([colgatepalmolive_df, colgatepalmolive_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
colgatepalmolive_df_r = colgatepalmolive_df.iloc[::-1]
colgatepalmolive_df_r = colgatepalmolive_df_r.reset_index(drop=True)
colgatepalmolive_df_r

# Merge two DataFrames by index using pandas.merge()
colgatepalmolive_df_r_price = pd.merge(colgatepalmolive_df_r, prices_df_qtr_col, left_index=True, right_index=True)
colgatepalmolive_df_r_price = pd.merge(colgatepalmolive_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
colgatepalmolive_df_r_price = pd.merge(colgatepalmolive_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
colgatepalmolive_df_r_price = pd.merge(colgatepalmolive_df_r_price, crude_close ,  left_index=True, right_index=True)

colgatepalmolive_df_final = colgatepalmolive_df_r_price.drop(['COLPAL.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
colgatepalmolive_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_18 = colgatepalmolive_df_final.iloc[:split_point]
# Create test data set
data_test_18 = colgatepalmolive_df_final.iloc[split_point:]

#### 19. Pidilite Industries

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_pidiliteindustries_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/2#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_pidiliteindustries_qtr_1)


# Pandas Dataframe
pidiliteindustries_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
pidiliteindustries_df_qtr_1.drop(pidiliteindustries_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_1.rename(columns=pidiliteindustries_df_qtr_1.iloc[0], inplace = True)
pidiliteindustries_df_qtr_1_drop = pidiliteindustries_df_qtr_1.drop(pidiliteindustries_df_qtr_1.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_1_drop = pidiliteindustries_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
pidiliteindustries_df_qtr_1_drop = pidiliteindustries_df_qtr_1_drop.rename(columns = {'Mar 20':'Pidilite Industries Mar 20'})
pidiliteindustries_df_qtr_1_drop = pidiliteindustries_df_qtr_1_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_1_drop_T = pidiliteindustries_df_qtr_1_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_1_drop_T.rename(columns=pidiliteindustries_df_qtr_1_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_1_drop_T = pidiliteindustries_df_qtr_1_drop_T.drop(pidiliteindustries_df_qtr_1_drop_T.index[[0]])


xhtml_pidiliteindustries_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/3#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_pidiliteindustries_qtr_2)

# Pandas Dataframe
pidiliteindustries_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
pidiliteindustries_df_qtr_2.drop(pidiliteindustries_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_2.rename(columns=pidiliteindustries_df_qtr_2.iloc[0], inplace = True)
pidiliteindustries_df_qtr_2_drop = pidiliteindustries_df_qtr_2.drop(pidiliteindustries_df_qtr_2.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_2_drop = pidiliteindustries_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
pidiliteindustries_df_qtr_2_drop = pidiliteindustries_df_qtr_2_drop.rename(columns={pidiliteindustries_df_qtr_2_drop.columns[1]: 'Pidilite Industries Dec 19', pidiliteindustries_df_qtr_2_drop.columns[2]: 'Pidilite Industries Sep 19',
                            pidiliteindustries_df_qtr_2_drop.columns[3]: 'Pidilite Industries Jun 19', pidiliteindustries_df_qtr_2_drop.columns[4]: 'Pidilite Industries Mar 19', pidiliteindustries_df_qtr_2_drop.columns[5]: 'Pidilite Industries Dec 18'})


pidiliteindustries_df_qtr_2_drop = pidiliteindustries_df_qtr_2_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_2_drop_T = pidiliteindustries_df_qtr_2_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_2_drop_T.rename(columns=pidiliteindustries_df_qtr_2_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_2_drop_T = pidiliteindustries_df_qtr_2_drop_T.drop(pidiliteindustries_df_qtr_2_drop_T.index[[0]])

xhtml_pidiliteindustries_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/4#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_pidiliteindustries_qtr_3)

# Pandas Dataframe
pidiliteindustries_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
pidiliteindustries_df_qtr_3.drop(pidiliteindustries_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_3.rename(columns=pidiliteindustries_df_qtr_3.iloc[0], inplace = True)
pidiliteindustries_df_qtr_3_drop = pidiliteindustries_df_qtr_3.drop(pidiliteindustries_df_qtr_3.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_3_drop = pidiliteindustries_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
pidiliteindustries_df_qtr_3_drop

# Rename header to include company name
pidiliteindustries_df_qtr_3_drop = pidiliteindustries_df_qtr_3_drop.rename(columns={pidiliteindustries_df_qtr_3_drop.columns[1]: 'Pidilite Industries Sep 18', pidiliteindustries_df_qtr_3_drop.columns[2]: 'Pidilite Industries Jun 18',
                            pidiliteindustries_df_qtr_3_drop.columns[3]: 'Pidilite Industries Mar 18', pidiliteindustries_df_qtr_3_drop.columns[4]: 'Pidilite Industries Dec 17', pidiliteindustries_df_qtr_3_drop.columns[5]: 'Pidilite Industries Sep 17'})

pidiliteindustries_df_qtr_3_drop = pidiliteindustries_df_qtr_3_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_3_drop_T = pidiliteindustries_df_qtr_3_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_3_drop_T.rename(columns=pidiliteindustries_df_qtr_3_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_3_drop_T = pidiliteindustries_df_qtr_3_drop_T.drop(pidiliteindustries_df_qtr_3_drop_T.index[[0]])



xhtml_pidiliteindustries_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/5#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_pidiliteindustries_qtr_4)

# Pandas Dataframe

pidiliteindustries_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
pidiliteindustries_df_qtr_4.drop(pidiliteindustries_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_4.rename(columns=pidiliteindustries_df_qtr_4.iloc[0], inplace = True)
pidiliteindustries_df_qtr_4_drop = pidiliteindustries_df_qtr_4.drop(pidiliteindustries_df_qtr_4.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_4_drop = pidiliteindustries_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
pidiliteindustries_df_qtr_4_drop

# Rename header to include company name
pidiliteindustries_df_qtr_4_drop = pidiliteindustries_df_qtr_4_drop.rename(columns={pidiliteindustries_df_qtr_4_drop.columns[1]: 'Pidilite Industries Jun 17', pidiliteindustries_df_qtr_4_drop.columns[2]: 'Pidilite Industries Mar 17',
                            pidiliteindustries_df_qtr_4_drop.columns[3]: 'Pidilite Industries Dec 16', pidiliteindustries_df_qtr_4_drop.columns[4]: 'Pidilite Industries Sep 16', pidiliteindustries_df_qtr_4_drop.columns[5]: 'Pidilite Industries Jun 16'})

pidiliteindustries_df_qtr_4_drop = pidiliteindustries_df_qtr_4_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_4_drop_T = pidiliteindustries_df_qtr_4_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_4_drop_T.rename(columns=pidiliteindustries_df_qtr_4_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_4_drop_T = pidiliteindustries_df_qtr_4_drop_T.drop(pidiliteindustries_df_qtr_4_drop_T.index[[0]])

xhtml_pidiliteindustries_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/6#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_pidiliteindustries_qtr_5)

# Pandas Dataframe

pidiliteindustries_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
pidiliteindustries_df_qtr_5.drop(pidiliteindustries_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_5.rename(columns=pidiliteindustries_df_qtr_5.iloc[0], inplace = True)
pidiliteindustries_df_qtr_5_drop = pidiliteindustries_df_qtr_5.drop(pidiliteindustries_df_qtr_5.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_5_drop = pidiliteindustries_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
pidiliteindustries_df_qtr_5_drop

# Rename header to include company name
pidiliteindustries_df_qtr_5_drop = pidiliteindustries_df_qtr_5_drop.rename(columns={pidiliteindustries_df_qtr_5_drop.columns[1]: 'Pidilite Industries Mar 16', pidiliteindustries_df_qtr_5_drop.columns[2]: 'Pidilite Industries Dec 15',
                            pidiliteindustries_df_qtr_5_drop.columns[3]: 'Pidilite Industries Sep 15', pidiliteindustries_df_qtr_5_drop.columns[4]: 'Pidilite Industries Jun 15', pidiliteindustries_df_qtr_5_drop.columns[5]: 'Pidilite Industries Mar 15'})

pidiliteindustries_df_qtr_5_drop = pidiliteindustries_df_qtr_5_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_5_drop_T = pidiliteindustries_df_qtr_5_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_5_drop_T.rename(columns=pidiliteindustries_df_qtr_5_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_5_drop_T = pidiliteindustries_df_qtr_5_drop_T.drop(pidiliteindustries_df_qtr_5_drop_T.index[[0]])

xhtml_pidiliteindustries_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/pidiliteindustries/results/quarterly-results/PI11/7#PI11').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_pidiliteindustries_qtr_6)

# Pandas Dataframe
pidiliteindustries_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
pidiliteindustries_df_qtr_6.drop(pidiliteindustries_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_6.rename(columns=pidiliteindustries_df_qtr_6.iloc[0], inplace = True)
pidiliteindustries_df_qtr_6_drop = pidiliteindustries_df_qtr_6.drop(pidiliteindustries_df_qtr_6.index[[0]])

# Drop row data not required for analysis
pidiliteindustries_df_qtr_6_drop = pidiliteindustries_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
pidiliteindustries_df_qtr_6_drop

# Rename header to include company name
pidiliteindustries_df_qtr_6_drop = pidiliteindustries_df_qtr_6_drop.rename(columns={pidiliteindustries_df_qtr_6_drop.columns[1]: 'Pidilite Industries Dec 14', pidiliteindustries_df_qtr_6_drop.columns[2]: 'Pidilite Industries Sep 14',
                            pidiliteindustries_df_qtr_6_drop.columns[3]: 'Pidilite Industries Jun 14', pidiliteindustries_df_qtr_6_drop.columns[4]: 'Pidilite Industries Mar 14', pidiliteindustries_df_qtr_6_drop.columns[5]: 'Pidilite Industries Dec 13'})

pidiliteindustries_df_qtr_6_drop = pidiliteindustries_df_qtr_6_drop.rename(columns = {'Quarterly Results of Pidilite Industries (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
pidiliteindustries_df_qtr_6_drop_T = pidiliteindustries_df_qtr_6_drop.transpose()

# Reset index for the dataframe
pidiliteindustries_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
pidiliteindustries_df_qtr_6_drop_T.rename(columns=pidiliteindustries_df_qtr_6_drop_T.iloc[0], inplace = True)
pidiliteindustries_df_qtr_6_drop_T = pidiliteindustries_df_qtr_6_drop_T.drop(pidiliteindustries_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for Pidilite Industries
pidiliteindustries_df = pd.concat([pidiliteindustries_df_qtr_1_drop_T, pidiliteindustries_df_qtr_2_drop_T], ignore_index=True)
pidiliteindustries_df = pd.concat([pidiliteindustries_df, pidiliteindustries_df_qtr_3_drop_T], ignore_index=True)
pidiliteindustries_df = pd.concat([pidiliteindustries_df, pidiliteindustries_df_qtr_4_drop_T], ignore_index=True)
pidiliteindustries_df = pd.concat([pidiliteindustries_df, pidiliteindustries_df_qtr_5_drop_T], ignore_index=True)
pidiliteindustries_df = pd.concat([pidiliteindustries_df, pidiliteindustries_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
pidiliteindustries_df_r = pidiliteindustries_df.iloc[::-1]
pidiliteindustries_df_r = pidiliteindustries_df_r.reset_index(drop=True)
pidiliteindustries_df_r

# Merge two DataFrames by index using pandas.merge()
pidiliteindustries_df_r_price = pd.merge(pidiliteindustries_df_r, prices_df_qtr_pid, left_index=True, right_index=True)
pidiliteindustries_df_r_price = pd.merge(pidiliteindustries_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
pidiliteindustries_df_r_price = pd.merge(pidiliteindustries_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
pidiliteindustries_df_r_price = pd.merge(pidiliteindustries_df_r_price, crude_close ,  left_index=True, right_index=True)

pidiliteindustries_df_final = pidiliteindustries_df_r_price.drop(['PIDILITIND.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
pidiliteindustries_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_19 = pidiliteindustries_df_final.iloc[:split_point]
# Create test data set
data_test_19 = pidiliteindustries_df_final.iloc[split_point:]

#### 20. United Spirits Ltd

In [None]:
# Scrape, select financial features and print to a Dataframe

xhtml_us_qtr_1 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/2#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object
p.feed(xhtml_us_qtr_1)


# Pandas Dataframe
us_df_qtr_1 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols1 = [1, 2, 3, 4, 6] 
us_df_qtr_1.drop(us_df_qtr_1.columns[cols1], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_1.rename(columns=us_df_qtr_1.iloc[0], inplace = True)
us_df_qtr_1_drop = us_df_qtr_1.drop(us_df_qtr_1.index[[0]])

# Drop row data not required for analysis
us_df_qtr_1_drop = us_df_qtr_1_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
us_df_qtr_1_drop = us_df_qtr_1_drop.rename(columns = {'Mar 20':'us spirits Mar 20'})
us_df_qtr_1_drop = us_df_qtr_1_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_1_drop_T = us_df_qtr_1_drop.transpose()

# Reset index for the dataframe
us_df_qtr_1_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_1_drop_T.rename(columns=us_df_qtr_1_drop_T.iloc[0], inplace = True)
us_df_qtr_1_drop_T = us_df_qtr_1_drop_T.drop(us_df_qtr_1_drop_T.index[[0]])


xhtml_us_qtr_2 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/3#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_us_qtr_2)

# Pandas Dataframe
us_df_qtr_2 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
us_df_qtr_2.drop(us_df_qtr_2.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_2.rename(columns=us_df_qtr_2.iloc[0], inplace = True)
us_df_qtr_2_drop = us_df_qtr_2.drop(us_df_qtr_2.index[[0]])

# Drop row data not required for analysis
us_df_qtr_2_drop = us_df_qtr_2_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])

# Rename header to include company name
us_df_qtr_2_drop = us_df_qtr_2_drop.rename(columns={us_df_qtr_2_drop.columns[1]: 'us spirits Dec 19', us_df_qtr_2_drop.columns[2]: 'us spirits Sep 19',
                            us_df_qtr_2_drop.columns[3]: 'us spirits Jun 19', us_df_qtr_2_drop.columns[4]: 'us spirits Mar 19', us_df_qtr_2_drop.columns[5]: 'us spirits Dec 18'})


us_df_qtr_2_drop = us_df_qtr_2_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_2_drop_T = us_df_qtr_2_drop.transpose()

# Reset index for the dataframe
us_df_qtr_2_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_2_drop_T.rename(columns=us_df_qtr_2_drop_T.iloc[0], inplace = True)
us_df_qtr_2_drop_T = us_df_qtr_2_drop_T.drop(us_df_qtr_2_drop_T.index[[0]])

xhtml_us_qtr_3 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/4#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_us_qtr_3)

# Pandas Dataframe
us_df_qtr_3 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
us_df_qtr_3.drop(us_df_qtr_3.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_3.rename(columns=us_df_qtr_3.iloc[0], inplace = True)
us_df_qtr_3_drop = us_df_qtr_3.drop(us_df_qtr_3.index[[0]])

# Drop row data not required for analysis
us_df_qtr_3_drop = us_df_qtr_3_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
us_df_qtr_3_drop

# Rename header to include company name
us_df_qtr_3_drop = us_df_qtr_3_drop.rename(columns={us_df_qtr_3_drop.columns[1]: 'us spirits Sep 18', us_df_qtr_3_drop.columns[2]: 'us spirits Jun 18',
                            us_df_qtr_3_drop.columns[3]: 'us spirits Mar 18', us_df_qtr_3_drop.columns[4]: 'us spirits Dec 17', us_df_qtr_3_drop.columns[5]: 'us spirits Sep 17'})

us_df_qtr_3_drop = us_df_qtr_3_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_3_drop_T = us_df_qtr_3_drop.transpose()

# Reset index for the dataframe
us_df_qtr_3_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_3_drop_T.rename(columns=us_df_qtr_3_drop_T.iloc[0], inplace = True)
us_df_qtr_3_drop_T = us_df_qtr_3_drop_T.drop(us_df_qtr_3_drop_T.index[[0]])



xhtml_us_qtr_4 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/5#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_us_qtr_4)

# Pandas Dataframe

us_df_qtr_4 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
us_df_qtr_4.drop(us_df_qtr_4.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_4.rename(columns=us_df_qtr_4.iloc[0], inplace = True)
us_df_qtr_4_drop = us_df_qtr_4.drop(us_df_qtr_4.index[[0]])

# Drop row data not required for analysis
us_df_qtr_4_drop = us_df_qtr_4_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
us_df_qtr_4_drop

# Rename header to include company name
us_df_qtr_4_drop = us_df_qtr_4_drop.rename(columns={us_df_qtr_4_drop.columns[1]: 'us spirits Jun 17', us_df_qtr_4_drop.columns[2]: 'us spirits Mar 17',
                            us_df_qtr_4_drop.columns[3]: 'us spirits Dec 16', us_df_qtr_4_drop.columns[4]: 'us spirits Sep 16', us_df_qtr_4_drop.columns[5]: 'us spirits Jun 16'})

us_df_qtr_4_drop = us_df_qtr_4_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_4_drop_T = us_df_qtr_4_drop.transpose()

# Reset index for the dataframe
us_df_qtr_4_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_4_drop_T.rename(columns=us_df_qtr_4_drop_T.iloc[0], inplace = True)
us_df_qtr_4_drop_T = us_df_qtr_4_drop_T.drop(us_df_qtr_4_drop_T.index[[0]])

xhtml_us_qtr_5 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/6#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object

p.feed(xhtml_us_qtr_5)

# Pandas Dataframe

us_df_qtr_5 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
us_df_qtr_5.drop(us_df_qtr_5.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_5.rename(columns=us_df_qtr_5.iloc[0], inplace = True)
us_df_qtr_5_drop = us_df_qtr_5.drop(us_df_qtr_5.index[[0]])

# Drop row data not required for analysis
us_df_qtr_5_drop = us_df_qtr_5_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
us_df_qtr_5_drop

# Rename header to include company name
us_df_qtr_5_drop = us_df_qtr_5_drop.rename(columns={us_df_qtr_5_drop.columns[1]: 'us spirits Mar 16', us_df_qtr_5_drop.columns[2]: 'us spirits Dec 15',
                            us_df_qtr_5_drop.columns[3]: 'us spirits Sep 15', us_df_qtr_5_drop.columns[4]: 'us spirits Jun 15', us_df_qtr_5_drop.columns[5]: 'us spirits Mar 15'})

us_df_qtr_5_drop = us_df_qtr_5_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_5_drop_T = us_df_qtr_5_drop.transpose()

# Reset index for the dataframe
us_df_qtr_5_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_5_drop_T.rename(columns=us_df_qtr_5_drop_T.iloc[0], inplace = True)
us_df_qtr_5_drop_T = us_df_qtr_5_drop_T.drop(us_df_qtr_5_drop_T.index[[0]])

xhtml_us_qtr_6 = url_get_contents('https://www.moneycontrol.com/financials/unitedspirits/results/quarterly-results/US/7#US').decode('utf-8')

# Defining the HTMLTableParser object
p = HTMLTableParser()

# feeding the html contents in the
# HTMLTableParser object


p.feed(xhtml_us_qtr_6)

# Pandas Dataframe
us_df_qtr_6 = pd.DataFrame(p.tables[1])

# Drop unwanted columns.
cols2 = [6] 
us_df_qtr_6.drop(us_df_qtr_6.columns[cols2], axis=1, inplace=True)

# Make Column header as the 1st row
us_df_qtr_6.rename(columns=us_df_qtr_6.iloc[0], inplace = True)
us_df_qtr_6_drop = us_df_qtr_6.drop(us_df_qtr_6.index[[0]])

# Drop row data not required for analysis
us_df_qtr_6_drop = us_df_qtr_6_drop.drop(index=[2,4,8,11,12,13,14,15,22,26,27,33,34,35,38,39,40,41,42,43,44,45,46,47,48,49])
us_df_qtr_6_drop

# Rename header to include company name
us_df_qtr_6_drop = us_df_qtr_6_drop.rename(columns={us_df_qtr_6_drop.columns[1]: 'us spirits Dec 14', us_df_qtr_6_drop.columns[2]: 'us spirits Sep 14',
                            us_df_qtr_6_drop.columns[3]: 'us spirits Jun 14', us_df_qtr_6_drop.columns[4]: 'us spirits Mar 14', us_df_qtr_6_drop.columns[5]: 'us spirits Dec 13'})

us_df_qtr_6_drop = us_df_qtr_6_drop.rename(columns = {'Quarterly Results of United Spirits (in Rs. Cr.)':'Quarterly Results FMCG'})

# Transpose the dataframe
us_df_qtr_6_drop_T = us_df_qtr_6_drop.transpose()

# Reset index for the dataframe
us_df_qtr_6_drop_T.reset_index(inplace=True)

# Make Column header as the 1st row
us_df_qtr_6_drop_T.rename(columns=us_df_qtr_6_drop_T.iloc[0], inplace = True)
us_df_qtr_6_drop_T = us_df_qtr_6_drop_T.drop(us_df_qtr_6_drop_T.index[[0]])



# Merge dataframes for us spirits
us_df = pd.concat([us_df_qtr_1_drop_T, us_df_qtr_2_drop_T], ignore_index=True)
us_df = pd.concat([us_df, us_df_qtr_3_drop_T], ignore_index=True)
us_df = pd.concat([us_df, us_df_qtr_4_drop_T], ignore_index=True)
us_df = pd.concat([us_df, us_df_qtr_5_drop_T], ignore_index=True)
us_df = pd.concat([us_df, us_df_qtr_6_drop_T], ignore_index=True)


# Reverse the rows of the dataframe
us_df_r = us_df.iloc[::-1]
us_df_r = us_df_r.reset_index(drop=True)
us_df_r

# Merge two DataFrames by index using pandas.merge()
us_df_r_price = pd.merge(us_df_r, prices_df_qtr_us, left_index=True, right_index=True)
us_df_r_price = pd.merge(us_df_r_price, df_nse_fmcg_qtr , left_index=True, right_index=True)
us_df_r_price = pd.merge(us_df_r_price, nasdaq_index ,  left_index=True, right_index=True)
us_df_r_price = pd.merge(us_df_r_price, crude_close ,  left_index=True, right_index=True)

us_df_final = us_df_r_price.drop(['MCDOWELL-N.NS', '1q_avg_diff','index_x', 'index_y', 'index'], axis=1)
us_df_final

# Define split point
split_point = 20                             

# Create train set
data_train_20 = us_df_final.iloc[:split_point]
# Create test data set
data_test_20 = us_df_final.iloc[split_point:]

## Merge all individual dataframes to create the final dataframe for train and test.

In [None]:
df_train = pd.concat([data_train_1, data_train_2, data_train_3, data_train_4, data_train_5,
                data_train_6, data_train_7, data_train_8, data_train_9, data_train_10,
                data_train_11, data_train_12, data_train_13, data_train_14, data_train_15,
                data_train_16, data_train_17, data_train_18, data_train_19, data_train_20], ignore_index=True)
                


pd.set_option('display.max_columns', None)

df_train_final = df_train.drop(['Reserves Excluding Revaluation Reserves', 'Equity Dividend Rate (%)', 'EPS Before Extra Ordinary',
                                'Diluted EPS.', 'Net Sales/Income from operations', 'P/L After Tax from Ordinary Activities',
                                'P/L Before Exceptional Items Tax', 'P/L Before Other Inc. , Int., Excpt. Items Tax', 'P/L Before Int., Excpt. Items Tax' ], axis = 1)


In [None]:
df_train_final.to_csv("df_train_final.csv",encoding="utf-8", index=False)

df_train_final

In [None]:
df_test = pd.concat([data_test_1, data_test_2, data_test_3, data_test_4, data_test_5,
                data_test_6, data_test_7, data_test_8, data_test_9, data_test_10,
                data_test_11, data_test_12, data_test_13, data_test_14, data_test_15,
                data_test_16, data_test_17, data_test_18, data_test_19, data_test_20], ignore_index=True)
                

df_test_final = df_test.drop(['Reserves Excluding Revaluation Reserves', 'Equity Dividend Rate (%)', 'EPS Before Extra Ordinary',
                                'Diluted EPS.', 'Net Sales/Income from operations', 'P/L After Tax from Ordinary Activities',
                                'P/L Before Exceptional Items Tax', 'P/L Before Other Inc. , Int., Excpt. Items Tax', 'P/L Before Int., Excpt. Items Tax'], axis = 1)

In [None]:
df_test_final.to_csv("df_test_final.csv",encoding="utf-8", index=False)

df_test_final