In [1]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from bs4 import BeautifulSoup
import time
import random

In [2]:
def extractStockData(stock_code):
    
    response = requests.get("https://www.stocklore.ai/api/nse_data.csv?q="+stock_code)
    
    if(response.status_code == 200):
        
        a = response.content.decode("utf-8").split("\n")
        a = [i.split(",") for i in a]
        
        price_df = pd.DataFrame(a[1:-1], columns=a[0])
        price_df.iloc[:,1:] = price_df.iloc[:,1:].astype('float')
        price_df['Date'] = pd.to_datetime(price_df['Date'], format='%Y-%m-%d')
        price_df = price_df.sort_values(by=['Date']).reset_index(drop=True)
        
        return price_df
    
    else:
        print("Error in Stock Price Data Extraction, Check the stock code name and internet connection")    
        

In [3]:
def getSplitData(stock_code, start_year=2008, end_year=2019, scrape_min_interval=15):
    stocks_split_info = pd.DataFrame(columns = ['NseCode', 'Company', 'OldFV', 'NewFV', 'SplitDate'])
    urls = ["https://www.moneycontrol.com/stocks/marketinfo/splits/homebody.php?sel_year="+str(i) for i in range(start_year, end_year+1)]
    for url in urls:
        time.sleep(scrape_min_interval*(random.uniform(1,1.5)))
        res = requests.get(url)
        soup = BeautifulSoup(res.content,'lxml')
        split_table = soup.find_all('table')[-2]
        split_rows = split_table.find_all('tr')
        rows_with_data = split_rows[3:len(split_rows)-3]
        for row in rows_with_data:
            if(row.find('a')):
                try:
                    code = BeautifulSoup(requests.get("https://www.moneycontrol.com"+row.find('a')['href']).content, 'lxml').find('div', class_="FL gry10").get_text().split('|')[1][5:].strip()
                except:
                    code = 'notScraping'
                company = row.find('a').get_text()
                lst = [row.find_all('td')[i].get_text() for i in range(1,4)]
                stocks_split_info = stocks_split_info.append({'NseCode':code,'Company':company, 'OldFV':lst[0], 'NewFV':lst[1], 'SplitDate':lst[2]}, ignore_index=True)
                
    stocks_split_info['SplitDate'] = pd.to_datetime(stocks_split_info['SplitDate'], format='%d-%m-%Y')
    stock_split_df = stocks_split_info[stocks_split_info["NseCode"]==stock_code]
    stock_split_df = stock_split_df.sort_values(by=['SplitDate']).reset_index(drop=True)
    
    return stock_split_df


In [4]:
def getBonusData(stock_code, start_year=2008, end_year=2019, scrape_min_interval=15):
    bonus_issue_info = pd.DataFrame(columns = ['NseCode', 'Company', 'Ratio', 'AnnouncementDate', 'RecordDate', 'Ex-Bonus'])
    urls = ["https://www.moneycontrol.com/stocks/marketinfo/bonus/homebody.php?sel_year="+str(i) for i in range(start_year, end_year+1)]
    for url in urls:
        time.sleep(scrape_min_interval*(random.uniform(1,1.5)))
        res = requests.get(url)
        soup = BeautifulSoup(res.content,'lxml')
        bonus_table = soup.find_all('table', class_="b_12 dvdtbl")[0]
        bonus_rows = bonus_table.find_all('tr')
        rows_with_data = bonus_rows[2:]
        for row in rows_with_data:
            if(row.find('a')):
                try:
                    code = BeautifulSoup(requests.get("https://www.moneycontrol.com"+row.find('a')['href']).content, 'lxml').find('div', class_="FL gry10").get_text().split('|')[1][5:].strip()
                except:
                    code = 'notScraping'
                company = row.find('a').get_text()
                lst = [row.find_all('td')[i].get_text() for i in range(1,5)]
                bonus_issue_info = bonus_issue_info.append({'NseCode': code, 'Company':company, 'Ratio':lst[0], 'AnnouncementDate':lst[1], 'RecordDate':lst[2], 'Ex-Bonus':lst[3]}, ignore_index=True)
        
    bonus_issue_info['Ex-Bonus'] = pd.to_datetime(bonus_issue_info['Ex-Bonus'], format='%d-%m-%Y')
    bonus_issue_df = bonus_issue_info[bonus_issue_info['NseCode']==stock_code]
    bonus_issue_df = bonus_issue_df.sort_values(by=['Ex-Bonus']).reset_index(drop=True)
    
    return bonus_issue_df       

In [5]:
def adjustSplit(price_df, split_df):
    
    split_df.iloc[:,2:4] = split_df.iloc[:,2:4].astype('int')
    
    for i in range(len(split_df)):
        price_df.loc[price_df['Date']<split_df['SplitDate'][i],['Open', 'Close', 'High', 'Low', 'Volume']] /= (split_df['OldFV'][i]/split_df['NewFV'][i])
        
    return price_df

In [6]:
def adjustBonus(price_df, bonus_df):
    
    bonus_df['Ratio'] = [ i.split(':') for i in bonus_df['Ratio']]
    
    for i in range(len(bonus_df)):
        price_df.loc[price_df['Date']<bonus_df['Ex-Bonus'][i],['Open', 'Close', 'High', 'Low', 'Volume']] *= (int(bonus_df['Ratio'][i][1]))/(int(bonus_df['Ratio'][i][0])+int(bonus_df['Ratio'][i][1]))
    
    return price_df

In [7]:
def adjustStockData(stock_code, store=False, filename=" ", real_time_scrape=False, scrape_min_interval=15):
    
    price_df = extractStockData(stock_code)
    start_year = price_df['Date'][0].year
    end_year = price_df['Date'][len(price_df)-1].year
    original_df = price_df.copy()

    if(real_time_scrape==True):
        split_df = getSplitData(stock_code, start_year, end_year, scrape_min_interval)
        bonus_df = getBonusData(stock_code, start_year, end_year, scrape_min_interval)
        
    else:
        path = Path('../Split and bonus data/moneycontrol.com_scrape')
        split_df = pd.read_csv(path/"stocks_split_data_nsecode.csv")
        bonus_df = pd.read_csv(path/"bonus_issue_data_nsecode.csv")
    
    return price_df, split_df, bonus_df
        
    price_df = adjustSplit(price_df, split_df)
    price_df = adjustBonus(price_df, bonus_df)
    
    if(store==True):
        if(filename==" "):
            price_df.to_csv(stock_code+".csv", index=False)
        else:
            price_df.to_csv(filename+".csv", index=False)
    
    return original_df, price_df


In [None]:
a, b = adjustStockData("JISLJALEQS", real_time_scrape=True, scrape_min_interval=10, store=True)

In [None]:
plt.figure(1, figsize=(20,10))
plt.subplot(121)
plt.plot(a['Open'].values, 'r-')
plt.plot(a['Close'].values, 'b-')

plt.subplot(122)
plt.plot(b['Open'].values, 'r-')
plt.plot(b['Close'].values, 'b-')

In [None]:
b

In [None]:
c

In [None]:
d = adjustSplit(a, b)

In [None]:
plt.figure(figsize=(20, 10))
plt.subplot(122)
plt.plot(a['Open'].values, 'r-')
#plt.plot(d['Open'].values, 'b-')