In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()

PGHOST = os.getenv("PGHOST")
PGPORT = os.getenv("PGPORT", "5432")
PGDATABASE = os.getenv("PGDATABASE")
PGUSER = os.getenv("PGUSER")
PGPASSWORD = os.getenv("PGPASSWORD")

In [None]:
connection_string = (
    f"postgresql+psycopg2://{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}"
)

In [None]:
DIR_WORKSPACE = Path.cwd().parent.parent
DIR_DATA = DIR_WORKSPACE / "data"
DIR_OUTPUT = DIR_DATA / "output_combined_data"

In [None]:
df = pd.read_csv(DIR_OUTPUT / "combined_csv.csv")
df.head()

In [None]:
df['counter'].unique()

In [None]:
companies={'AIRTEL':'Airtel Malawi plc', 
       'BHL':'Blantyre Hotels Plc', 
       'FDHB':'FDH Bank plc', 
       'FMBCH':'FMB Capital Holdings plc', 
       'ICON':'ICON Properties plc',
       'ILLOVO':'Illovo Sugar Malawi plc', 
       'MPICO':'Malawi Property Investment Company plc',
       'NBM':'National Bank of Malawi',
       'NBS':'NBS Bank plc', 
       'NICO':'NICO Holdings plc',
       'NITL':'National Investment Trust Plc',
       'OMU':'Old Mutual Limited', 
       'PCL':'Press Corporation plc', 
       'STANDARD':'Standard Bank Malawi plc',
       'SUNBIRD':'Sunbird Tourism plc',
       'TNM':'Telekom Networks Malawi plc',
       'OML':'Old Mutual Limited', 
       'MPICO CD':'Malawi Property Investment Company plc',
       'NBM CD':'National Bank of Malawi', 
       'NICO CD':'NICO Holdings plc', 
       'PCL CD':'Press Corporation plc',
       'Standard CD':'Standard Bank Malawi plc',
       'Sunbird CD':'Sunbird Tourism plc',
       'TNM CD':'Telekom Networks Malawi plc', 
       'MPICO TS':'Malawi Property Investment Company plc', 
       'BHL CD':'Blantyre Hotels Plc',
       'BHL XD':'Blantyre Hotels Plc', 
       'NITL XD':'National Investment Trust Plc',
       'NITL CD':'National Investment Trust Plc',
       'NBS TS':'NBS Bank plc', 
       'NICO TS':'NICO Holdings plc', 
       'Sunbird':'Sunbird Tourism plc',
       'PCL TS':'Press Corporation plc',
       'Sunbird H':'Sunbird Tourism plc',
       'SUNBIRD H':'Sunbird Tourism plc',
       'StandardCD':'Standard Bank Malawi plc', 
       'SUNBIRDCD':'Sunbird Tourism plc', 
       'NITL TS':'National Investment Trust Plc',
       'SUNBIRD TS':'Sunbird Tourism plc',
       'TNM TS':'Telekom Networks Malawi plc',
       'NBM XD':'National Bank of Malawi',
       'StandardTS':'Standard Bank Malawi plc',
       'TNM TS XD':'Telekom Networks Malawi plc', 
       'TNM TS CD':'Telekom Networks Malawi plc',
       'FMB CA':'FMB Capital Holdings plc', 
       'Sunbird XD':'Sunbird Tourism plc',
       'PCL XD':'Press Corporation plc',
       'MPICO XD':'Malawi Property Investment Company plc',
       'NICO XD':'NICO Holdings plc',
       'Standard XD':'Standard Bank Malawi plc',
       'TNM XD':'Telekom Networks Malawi plc',
       'FMBTS CA XD':'FMB Capital Holdings plc', 
       'Sunbird TS':'Sunbird Tourism plc',
       'FMBTS CA CD':'FMB Capital Holdings plc', 
       'SunbirdXD TS':'Sunbird Tourism plc',
       'SunbirdCD TS':'Sunbird Tourism plc',
       'NBS-LA*':'NBS Bank plc'}

In [None]:
def company_names(row):
    return companies[row['counter']].upper()
df["name"] = df.apply(company_names, axis=1)
df = df.rename(columns={"counter": "ticker"})
df.head()

In [None]:
df_prices = df.copy()
df_prices.to_csv(DIR_OUTPUT / "master_dataset.csv")
df = pd.read_csv(DIR_OUTPUT / "site_data.csv")
ids = {}
for ticker, id in zip(df["Company Name"].values, df["ISIN"].values):
    ids[ticker]=id
df_prices['counter_id']=df_prices['name'].apply(lambda x:ids[x])

prices={}
for ticker, price in zip(df['Company Name'].values,df['Listing Price'].values):
    prices[ticker]=price
df_prices['open_mwk']=df_prices['name'].apply(lambda x:prices[x])

# extract only the required fields
df_prices = df_prices[
    [
        "counter_id",
        "trade_date",
        "open_mwk",
        "daily_range_high",
        "daily_range_low",
        "today_closing_price",
        "volume_traded",
    ]
]
df_prices.columns = [
    "counter_id",
    "trade_date",
    "open_mwk",
    "high_mwk",
    "low_mwk",
    "close_mwk",
    "volume",
]
df_prices["volume"] = df_prices["volume"].astype(float)
df_prices["trade_date"] = pd.to_datetime(df_prices["trade_date"])
df_prices["close_mwk"] = df_prices["close_mwk"].astype(float)
df_prices.head()

In [None]:
#extract only the required fields
df_prices=df_prices[['counter_id','trade_date','open_mwk','daily_range_high', 'daily_range_low','today_closing_price','volume_traded' ]]
df_prices.columns=['counter_id','trade_date','open_mwk','high_mwk','low_mwk','close_mwk','volume']

In [None]:
df_prices[df_prices['close_mwk']=='20000(SB)']

In [None]:
df_prices["trade_date"] = pd.to_datetime(df_prices["trade_date"])
df_prices['volume']=df_prices['volume'].astype(float)
df_prices['close_mwk']=df_prices['close_mwk'].astype(float)
df_prices.info()

In [None]:
engine = create_engine(
    connection_string,
    pool_pre_ping=True,
)

# Test the connection
with engine.connect() as conn:
    who = conn.execute(text("SELECT current_user, current_database();")).fetchone()
    print("Connected as:", who)

In [None]:
engine = create_engine(
    connection_string,
    pool_pre_ping=True,
)
# create a table counters
query = """create table if not exists counters (counter_id text primary key, ticker text not null, name text not null, listing_price real not null, date_listed date not null);"""
with engine.begin() as conn:
    conn.execute(text(query))

In [None]:
df = pd.read_csv(DIR_OUTPUT / "site_data.csv")
df.columns = ["name", "ticker", "counter_id", "listing_price", "date_listed"]
df = df[["counter_id", "ticker", "name", "listing_price", "date_listed"]]
df["date_listed"] = pd.to_datetime(df["date_listed"]).dt.date
df.head()

In [None]:
df.to_sql('counters', engine, if_exists='append', index=False)

In [None]:
# Create prices table
query="""create table if not exists prices (
    counter_id text REFERENCES counters(counter_id),
    trade_date date,
    open_mwk real,
    high_mwk real,
    low_mwk real,
    close_mwk real,
    volume real
    );"""
with engine.begin() as conn:
    conn.execute(text(query))

In [None]:
df_prices.to_sql('prices', engine, if_exists='append', index=False)