<a href="https://colab.research.google.com/github/pikey-msc/RiesgosFinancieros/blob/master/2022-1/Insumos/MertonKMV_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd

def get_page(url):
    # Set up the request headers that we're going to use, to simulate
    # a request by the Chrome browser. Simulating a request from a browser
    # is generally good practice when building a scraper
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'close',
        'DNT': '1', # Do Not Track Request Header 
        'Pragma': 'no-cache',
        'Referrer': 'https://google.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }

    return requests.get(url, headers=headers)

def parse_rows(table_rows):
    parsed_rows = []

    for table_row in table_rows:
        parsed_row = []
        el = table_row.xpath("./div")

        none_count = 0

        for rs in el:
            try:
                (text,) = rs.xpath('.//span/text()[1]')
                parsed_row.append(text)
            except ValueError:
                parsed_row.append(np.NaN)
                none_count += 1

        if (none_count < 4):
            parsed_rows.append(parsed_row)
            
    return pd.DataFrame(parsed_rows)

def clean_data(df):
    df = df.set_index(0) # Set the index to the first column: 'Period Ending'.
    df = df.transpose() # Transpose the DataFrame, so that our header contains the account names
    
    # Rename the "Breakdown" column to "Date"
    cols = list(df.columns)
    cols[0] = 'Date'
    df = df.set_axis(cols, axis='columns', inplace=False)
    
    numeric_columns = list(df.columns)[1::] # Take all columns, except the first (which is the 'Date' column)

    for column_index in range(1, len(df.columns)): # Take all columns, except the first (which is the 'Date' column)
        df.iloc[:,column_index] = df.iloc[:,column_index].str.replace(',', '') # Remove the thousands separator
        df.iloc[:,column_index] = df.iloc[:,column_index].astype(np.float64) # Convert the column to float64
        
    return df

def scrape_table(url):
    # Fetch the page that we're going to parse
    page = get_page(url);

    # Parse the page with LXML, so that we can start doing some XPATH queries
    # to extract the data that we want
    tree = html.fromstring(page.content)

    # Fetch all div elements which have class 'D(tbr)'
    table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
    
    # Ensure that some table rows are found; if none are found, then it's possible
    # that Yahoo Finance has changed their page layout, or have detected
    # that you're scraping the page.
    assert len(table_rows) > 0
    
    df = parse_rows(table_rows)
    df = clean_data(df)
        
    return df

In [4]:
symbol = 'WALMEX.MX'
df_balance_sheet = scrape_table('https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol)
df_financials=scrape_table('https://finance.yahoo.com/quote/' + symbol + '/financials?p=' + symbol)

In [1]:
!pip install yfinance



In [22]:
import yfinance as yf

# berkshire hathaway inc. (b stock)
brk = yf.Ticker('WALMEX.MX')

hist = brk.history(period="max", auto_adjust=True)

hist

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2003-01-01,4.328765,4.496691,4.328765,4.446314,0,0.0,0.0
2003-01-02,4.328765,4.496691,4.328765,4.446314,8334330,0.0,0.0
2003-01-03,4.440716,4.468704,4.423924,4.440716,18475096,0.0,0.0
2003-01-06,4.440718,4.625437,4.440718,4.616107,16180223,0.0,0.0
2003-01-07,4.616106,4.616106,4.500423,4.509752,15261074,0.0,0.0
...,...,...,...,...,...,...,...
2022-01-11,72.419998,72.690002,71.849998,72.279999,9874950,0.0,0.0
2022-01-12,72.059998,73.389999,72.059998,72.730003,13043111,0.0,0.0
2022-01-13,72.620003,73.230003,71.510002,71.849998,14271331,0.0,0.0
2022-01-14,71.910004,71.910004,71.279999,71.459999,13923854,0.0,0.0


In [23]:
import pandas as pd
import numpy as np
from statistics import stdev

df = pd.DataFrame()

df['ds'] = hist.index
df['y'] = hist['Close'].values

df.tail()

Unnamed: 0,ds,y
4815,2022-01-11,72.279999
4816,2022-01-12,72.730003
4817,2022-01-13,71.849998
4818,2022-01-14,71.459999
4819,2022-01-17,71.379997


In [24]:
Delta_A = df['y'].pct_change(1) # 1 for ONE DAY lookback
vol_A=stdev(Delta_A[Delta_A.notnull()])

vol_A

0.016639733626630068