In [11]:
pip install schedule

Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2
Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install html5lib

Note: you may need to restart the kernel to use updated packages.


In [92]:
import requests
import pandas as pd
import datetime
import pytz
import schedule
import time
from datetime import datetime
from io import StringIO
import os


In [94]:
url = "https://www.zse.co.zw/price-sheet/"
url2 = "https://zimpricecheck.com/price-updates/official-and-black-market-exchange-rates/?srsltid=AfmBOoo-30J1RAcbr6OMk7Z-R0rUF_sH7WBp97Qt1O3C4FrP8n7cXhj_"

In [96]:
def get_todays_data():
    """ function to extract Zim Stock Exchange daily data"""
    response = requests.get(url)
    data = pd.read_html(StringIO(response.text))
    df = pd.DataFrame(data[0])
    df.columns = df.iloc[0]
    df = df[1:]
    df = df.dropna()
    return df

def get_open_price():
    """ function to create dataframe from extracted data,
    specifically detailing open price figures."""
    df = get_todays_data()
    open_price = df[['Company Name','Opening Price']].T
    open_price.columns  = open_price.iloc[0]
    open_price = open_price[1:]
    open_price.index.name = 'Date'
    open_price = open_price.rename(
      index={open_price.index[0]: datetime.now(pytz.timezone('Africa/Harare'))})
    return open_price

def get_close_price():
    """ function to create dataframe from extracted data,
    specifically detailing open price figures."""
    df = get_todays_data()
    close_price = df[['Company Name','Closing Price']].T
    close_price.columns  = close_price.iloc[0]
    close_price = close_price[1:]
    close_price.index.name = 'Date'
    close_price = close_price.rename(
      index={close_price.index[0]: datetime.now(pytz.timezone('Africa/Harare'))})
    return close_price

def get_vol_traded():
    """ function to create dataframe from extracted data,
    specifically detailing open price figures."""
    df = get_todays_data()
    vol_traded = df[['Company Name','Total Traded Volume']].T
    vol_traded.columns  = vol_traded.iloc[0]
    vol_traded = vol_traded[1:]
    vol_traded.index.name = 'Date'
    vol_traded = vol_traded.rename(
      index={vol_traded.index[0]: datetime.now(pytz.timezone('Africa/Harare'))})
    return vol_traded



In [98]:
## this functions are to get daily exchnage rates

def get_rates():
    """ function to create dataframe from extracted data,
    specifically detailing exchange rate figures."""
    response = requests.get(url2)
    data = pd.read_html(StringIO(response.text))
    df = pd.DataFrame(data[0])

    # Example data

    # Extract the numeric part
    df['value'] = df['Value'].str.extract(r'(\d+\.?\d*)').astype(float)
    
    df = df.drop(columns = ['Value'], axis = 1)
    target_rates = [
    '1 USD to ZiG',
    '1 USD to ZiG Lowest Informal Sector Rate',
    '1 USD to ZiG Highest Informal Sector Rate']

    # Filter the dataframe
    filtered_rates = df[df['Rate'].isin(target_rates)]

    
    """
    Converts the new rates dataframe to match the structure of the base dataframe.

    Returns a one-row dataframe with columns: 
    Date, USA DOLLAR, Lowest Informal Sector Rate, Highest Informal Sector Rate
    """
    # Get today's date
    today = pd.to_datetime(datetime.today().date())

    # Extract the required rates
    try:
        usd_rate = filtered_rates.loc[filtered_rates['Rate'] == '1 USD to ZiG', 'value'].values[0]
    except IndexError:
        usd_rate = pd.NA  # If not found, leave as missing

    try:
        lowest_rate = filtered_rates.loc[filtered_rates['Rate'] == '1 USD to ZiG Lowest Informal Sector Rate', 'value'].values[0]
    except IndexError:
        lowest_rate = pd.NA

    try:
        highest_rate = filtered_rates.loc[filtered_rates['Rate'] == '1 USD to ZiG Highest Informal Sector Rate', 'value'].values[0]
    except IndexError:
        highest_rate = pd.NA

    # Build the new dataframe in the required structure
    rates = pd.DataFrame([{
        'Date': today,
        'USA DOLLAR': usd_rate,
        'Lowest Informal Sector Rate': lowest_rate,
        'Highest Informal Sector Rate': highest_rate
    }])

    return rates





In [103]:
# code to retrieve existing json files with Zim Stock Data for updating
open_json = pd.read_json('archive-single-file/open_price.json', orient = 'split')
close_json = pd.read_json('archive-single-file/close_price.json', orient = 'split')
vol_json = pd.read_json('archive-single-file/vol_traded.json', orient = 'split')
rates_json = pd.read_json('archive-single-file/rates.json', orient = 'split')

In [105]:
#this is the code that needs to run everyday
def update_data():
    """ this function updates the daily extracted data onto the existing dataframe"""
    open_price = pd.concat([open_json, get_open_price()], axis=0) # Update the variables
    close_price = pd.concat([close_json, get_close_price()], axis=0)
    vol_traded = pd.concat([vol_json, get_vol_traded()], axis=0)
    rates = pd.concat([rates_json, get_rates()], axis=0)
    
    return open_price, close_price, vol_traded, rates

In [108]:
def save_data(open_price: pd.DataFrame, close_price: pd.DataFrame, vol_traded: pd.DataFrame, rates: pd.DataFrame):
    """ function to save updated dataframes as json files"""
    # Save the DataFrames to JSON files with the timestamped directory
    open_price.to_json('archive-single-file/open_price.json', orient='split', date_format='iso')
    close_price.to_json('archive-single-file/close_price.json', orient='split', date_format='iso')
    vol_traded.to_json('archive-single-file/vol_traded.json', orient='split', date_format='iso')
    rates.to_json('archive-single-file/rates.json', orient='split', date_format='iso')

save_data(update_data()[0], update_data()[1] , update_data()[2], update_data()[3])