In [3]:
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import pandas as pd
from pathlib import Path
import requests
from bs4 import BeautifulSoup
import re

In [4]:
load_dotenv()  

PGHOST = os.getenv("PGHOST")
PGPORT = os.getenv("PGPORT", "5432")
PGDATABASE = os.getenv("PGDATABASE")
PGUSER = os.getenv("PGUSER")
PGPASSWORD = os.getenv("PGPASSWORD")

In [5]:
print("PostgreSQL Connection Settings:")
print(f"Host: {PGHOST}")
print(f"Port: {PGPORT}")
print(f"Database: {PGDATABASE}")
print(f"User: {PGUSER}")
print(f"Password: {'[SET]' if PGPASSWORD else '[NOT SET]'}")

PostgreSQL Connection Settings:
Host: localhost
Port: 5432
Database: mse_database
User: postgres
Password: [NOT SET]


In [6]:
# Create SQLAlchemy engine (no password needed for local connections)
connection_string = f"postgresql+psycopg2://{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}"
print("Connection psql string:", connection_string)

Connection psql string: postgresql+psycopg2://postgres:@localhost:5432/mse_database


In [7]:
engine = create_engine(
    connection_string,
    pool_pre_ping=True,
)

# Test the connection
with engine.connect() as conn:
    who = conn.execute(text("SELECT current_user, current_database();")).fetchone()
    print("Connected as:", who)

Connected as: ('postgres', 'mse_database')


In [51]:
dir_data = Path.cwd().parents[0]/"data"/"master_csv"
masterfile = dir_data/"master.csv"

masterfile

WindowsPath('d:/Documents/AIMS_DSCBI_Training/mse-api-assignment/data/master_csv/master.csv')

In [67]:
df = pd.read_csv(masterfile)

In [68]:
companies = {'AIRTEL':'Airtel Malawi plc', 
       'BHL':'Blantyre Hotels Plc', 
       'FDHB':'FDH Bank plc', 
       'FMBCH':'FMB Capital Holdings plc', 
       'ICON':'ICON Properties plc',
       'ILLOVO':'Illovo Sugar Malawi plc', 
       'MPICO':'Malawi Property Investment Company plc',
       'NBM':'National Bank of Malawi',
       'NBS':'NBS Bank plc', 
       'NICO':'NICO Holdings plc',
       'NITL':'National Investment Trust Plc',
       'OMU':'Old Mutual Limited', 
       'PCL':'Press Corporation plc', 
       'STANDARD':'Standard Bank Malawi plc',
       'SUNBIRD':'Sunbird Tourism plc',
       'TNM':'Telekom Networks Malawi plc',
       'OML':'Old Mutual Limited', 
       'MPICO CD':'Malawi Property Investment Company plc',
       'NBM CD':'National Bank of Malawi', 
       'NICO CD':'NICO Holdings plc', 
       'PCL CD':'Press Corporation plc',
       'Standard CD':'Standard Bank Malawi plc',
       'Sunbird CD':'Sunbird Tourism plc',
       'TNM CD':'Telekom Networks Malawi plc', 
       'MPICO TS':'Malawi Property Investment Company plc', 
       'BHL CD':'Blantyre Hotels Plc',
       'BHL XD':'Blantyre Hotels Plc', 
       'NITL XD':'National Investment Trust Plc',
       'NITL CD':'National Investment Trust Plc',
       'NBS TS':'NBS Bank plc', 
       'NICO TS':'NICO Holdings plc', 
       'Sunbird':'Sunbird Tourism plc',
       'PCL TS':'Press Corporation plc',
       'Sunbird H':'Sunbird Tourism plc',
       'SUNBIRD H':'Sunbird Tourism plc',
       'StandardCD':'Standard Bank Malawi plc', 
       'SUNBIRDCD':'Sunbird Tourism plc', 
       'NITL TS':'National Investment Trust Plc',
       'SUNBIRD TS':'Sunbird Tourism plc',
       'TNM TS':'Telekom Networks Malawi plc',
       'NBM XD':'National Bank of Malawi',
       'StandardTS':'Standard Bank Malawi plc',
       'TNM TS XD':'Telekom Networks Malawi plc', 
       'TNM TS CD':'Telekom Networks Malawi plc',
       'FMB CA':'FMB Capital Holdings plc', 
       'Sunbird XD':'Sunbird Tourism plc',
       'PCL XD':'Press Corporation plc',
       'MPICO XD':'Malawi Property Investment Company plc',
       'NICO XD':'NICO Holdings plc',
       'Standard XD':'Standard Bank Malawi plc',
       'TNM XD':'Telekom Networks Malawi plc',
       'FMBTS CA XD':'FMB Capital Holdings plc', 
       'Sunbird TS':'Sunbird Tourism plc',
       'FMBTS CA CD':'FMB Capital Holdings plc', 
       'SunbirdXD TS':'Sunbird Tourism plc',
       'SunbirdCD TS':'Sunbird Tourism plc',
       'NBS-LA*':'NBS Bank plc'}

In [69]:
def company(row):
    return companies[row['counter']].upper()
df['name'] = df.apply(company,axis=1)
df = df.rename(columns={"counter": "ticker"})
df

Unnamed: 0,counter_id,daily_range_high,daily_range_low,ticker,buy_price,sell_price,previous_closing_price,today_closing_price,volume_traded,dividend_mk,dividend_yield_pct,earnings_yield_pct,pe_ratio,pbv_ratio,market_capitalization_mkmn,profit_after_tax_mkmn,num_shares_issue,trade_date,print_time,name
0,1,138.97,137.94,AIRTEL,0,137.96,138.54,137.98,79004.0,2,1.45,2.81,35.53,47.26,1517780,42722.110,1.100000e+10,9/19/2025,14:37:47,AIRTEL MALAWI PLC
1,2,15.02,15.02,BHL,15.02,0,15.02,15.02,7042.0,0,0.00,-1.55,-64.44,1.36,88291.39,-1370.110,5.878255e+09,9/19/2025,14:37:47,BLANTYRE HOTELS PLC
2,3,637.95,637.91,FDHB,635,637.92,637.95,637.93,101411.0,4.73,0.74,1.68,59.44,45.2,4402374.87,74063.000,6.901031e+09,9/19/2025,14:37:47,FDH BANK PLC
3,4,1897.99,1897.99,FMBCH,1897.99,0,1897.99,1897.99,15133.0,3.64,0.19,2.53,39.45,14.18,4665733.92,118254.740,2.458250e+09,9/19/2025,14:37:47,FMB CAPITAL HOLDINGS PLC
4,5,17.94,17.94,ICON,0,17.94,17.94,17.94,18168.0,0.29,1.62,20.38,4.91,0.82,119839.2,24424.490,6.680000e+09,9/19/2025,14:37:47,ICON PROPERTIES PLC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26805,10,,,NITL,3650,,3650.00,,,100,2.74,,,0.75,4927.5,-1071.020,1.350000e+08,6/29/2017,14:00:00,NATIONAL INVESTMENT TRUST PLC
26806,11,,,PCL,,57000,57000.00,,,1250,2.19,25.11,3.98,0.66,68545.82,17214.000,1.202558e+08,6/29/2017,14:00:00,PRESS CORPORATION PLC
26807,12,,,STANDARD,57001,60000,57001.00,,,2557,4.49,14.52,6.89,2.13,133763.2,19425.000,2.346682e+08,6/29/2017,14:00:00,STANDARD BANK MALAWI PLC
26808,13,,,SUNBIRD TS,,6500,6400.00,,,40,0.63,7.98,12.53,1.27,16741.29,1336.457,2.615826e+08,6/29/2017,14:00:00,SUNBIRD TOURISM PLC


In [62]:
df_prices = df.copy()
df =  pd.read_csv(dir_data/ "mse_web.csv")
df

Unnamed: 0,Company Name,Symbol,ISIN,Listing Price,Date Listed
0,AIRTEL MALAWI PLC,AIRTEL,MWAIRT001156,12.69,24-Feb-20
1,BLANTYRE HOTELS PLC,BHL,MWBHL0010029,0.84,25-Mar-97
2,FDH BANK PLC,FDHB,MWFDHB001166,10.0,3-Aug-20
3,FMB CAPITAL HOLDINGS PLC,FMBCH,MWFMB0010138,45.01,18-Sep-17
4,ICON PROPERTIES PLC,ICON,MWICON001146,8.75,21-Jan-19
5,ILLOVO SUGAR MALAWI PLC,ILLOVO,MWILLV010032,2.25,10-Nov-97
6,MALAWI PROPERTY INVESTMENT COMPANY PLC,MPICO,MWMPI0010116,2.25,12-Nov-07
7,NATIONAL BANK OF MALAWI,NBM,MWNBM0010074,4.0,21-Aug-00
8,NBS BANK PLC,NBS,MWNBS0010105,2.6,25-Jun-07
9,NICO HOLDINGS PLC,NICO,MWNICO010014,2.0,11-Nov-96


In [63]:
df =  pd.read_csv(dir_data/ "mse_web.csv")
ids={}
for ticker, id in zip(df['Company Name'].values,df['ISIN'].values):
    ids[ticker] = id
df_prices['counter_id'] = df_prices['name'].apply(lambda x:ids[x])
#df_prices.head()

prices={}
for ticker, price in zip(df['Company Name'].values,df['Listing Price'].values):
    prices[ticker] = price
df_prices['open_mwk'] = df_prices['name'].apply(lambda x:prices[x])
df_prices.tail()
df_prices.head()

Unnamed: 0,counter_id,daily_range_high,daily_range_low,ticker,buy_price,sell_price,previous_closing_price,today_closing_price,volume_traded,dividend_mk,...,earnings_yield_pct,pe_ratio,pbv_ratio,market_capitalization_mkmn,profit_after_tax_mkmn,num_shares_issue,trade_date,print_time,name,open_mwk
0,MWAIRT001156,138.97,137.94,AIRTEL,0.0,137.96,138.54,137.98,79004.0,2.0,...,2.81,35.53,47.26,1517780.0,42722.11,11000000000.0,9/19/2025,14:37:47,AIRTEL MALAWI PLC,12.69
1,MWBHL0010029,15.02,15.02,BHL,15.02,0.0,15.02,15.02,7042.0,0.0,...,-1.55,-64.44,1.36,88291.39,-1370.11,5878255000.0,9/19/2025,14:37:47,BLANTYRE HOTELS PLC,0.84
2,MWFDHB001166,637.95,637.91,FDHB,635.0,637.92,637.95,637.93,101411.0,4.73,...,1.68,59.44,45.2,4402374.87,74063.0,6901031000.0,9/19/2025,14:37:47,FDH BANK PLC,10.0
3,MWFMB0010138,1897.99,1897.99,FMBCH,1897.99,0.0,1897.99,1897.99,15133.0,3.64,...,2.53,39.45,14.18,4665733.92,118254.74,2458250000.0,9/19/2025,14:37:47,FMB CAPITAL HOLDINGS PLC,45.01
4,MWICON001146,17.94,17.94,ICON,0.0,17.94,17.94,17.94,18168.0,0.29,...,20.38,4.91,0.82,119839.2,24424.49,6680000000.0,9/19/2025,14:37:47,ICON PROPERTIES PLC,8.75


In [65]:
#extract only the required fields
df_prices = df_prices[['counter_id','trade_date','open_mwk','daily_range_high', 'daily_range_low','today_closing_price','volume_traded' ]]
df_prices.columns = ['counter_id','trade_date','open_mwk','high_mwk','low_mwk','close_mwk','volume']
df_prices.head()

Unnamed: 0,counter_id,trade_date,open_mwk,high_mwk,low_mwk,close_mwk,volume
0,MWAIRT001156,9/19/2025,12.69,138.97,137.94,137.98,79004.0
1,MWBHL0010029,9/19/2025,0.84,15.02,15.02,15.02,7042.0
2,MWFDHB001166,9/19/2025,10.0,637.95,637.91,637.93,101411.0
3,MWFMB0010138,9/19/2025,45.01,1897.99,1897.99,1897.99,15133.0
4,MWICON001146,9/19/2025,8.75,17.94,17.94,17.94,18168.0


In [None]:
#df_prices.to_csv(DIR_OUTPUT / "master_dataset.csv")
df_prices['volume']= df_prices['volume'].astype(float)
df_prices['trade_date'] = pd.to_datetime(df_prices['trade_date'])
df_prices['close_mwk'] = df_prices['close_mwk'].astype(float)
df_prices.info()

In [None]:
df=pd.read_csv(r"C:\Users\andrew.mushokambere\Documents\DSCBI\MSE_API_ASS\mse-api-assignment\data\site_data.csv")
df.head()
df.columns=['name', 'ticker', 'counter_id', 'listing_price', 'date_listed']
df=df[['counter_id', 'ticker', 'name', 'listing_price', 'date_listed']]
df['date_listed'] = pd.to_datetime(df['date_listed']).dt.date
df['listing_price'] = df['listing_price'].astype(float)
df.head()

In [8]:
df.to_sql('counters', engine, if_exists='append', index=False)

16

In [9]:
# Create prices_daily table
query="""create table if not exists prices (
    counter_id text REFERENCES counters(counter_id),
    trade_date date,
    open_mwk real,
    high_mwk real,
    low_mwk real,
    close_mwk real,
    volume real
    );"""
with engine.begin() as conn:
    conn.execute(text(query))

ProgrammingError: (psycopg2.errors.InvalidForeignKey) there is no unique constraint matching given keys for referenced table "counters"

[SQL: create table if not exists prices (
    counter_id text REFERENCES counters(counter_id),
    trade_date date,
    open_mwk real,
    high_mwk real,
    low_mwk real,
    close_mwk real,
    volume real
    );]
(Background on this error at: https://sqlalche.me/e/20/f405)