In [1]:
!pip install pandas numpy pdfplumber
from pathlib import Path
import pandas as pd
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os



In [12]:
DIR_WORKSPACE = Path.cwd().parents[0]
DIR_DATA = DIR_WORKSPACE / "data"
DIR_REPORTS_CSV = DIR_DATA / "mse-daily-data"
DIR_OUTPUT = DIR_DATA / "combined_output_data"

In [13]:
# Collect and combine all CSV files
all_csv_files = DIR_REPORTS_CSV.glob("*.csv")  # all CSVs in that directory
df_list = [pd.read_csv(f) for f in all_csv_files]

if df_list:  # make sure it's not empty
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df['counter_id'] = range(1, len(combined_df['counter_id']) + 1)
    print("Combined shape:", combined_df.shape)

    # Save to a single CSV
    output_file = DIR_OUTPUT / "combined_reports.csv"
    combined_df.to_csv(output_file, index=False)
    print("Combined CSV saved at:", output_file)
else:
    print("No CSV files found in", DIR_REPORTS_CSV)



Combined shape: (17872, 19)
Combined CSV saved at: d:\Documents\AIMS_DSCBI_Training\mse-api-assignment\data\combined_output_data\combined_reports.csv


In [24]:
combined_df.head(100)
combined_df.shape
combined_df.info()
combined_df.describe()
combined_df['counter'].value_counts()
combined_df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17872 entries, 0 to 17871
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   counter_id                  17872 non-null  int64  
 1   counter                     17872 non-null  object 
 2   daily_range_high            8595 non-null   float64
 3   daily_range_low             8595 non-null   float64
 4   buy_price                   17483 non-null  float64
 5   sell_price                  17483 non-null  float64
 6   previous_closing_price      17872 non-null  float64
 7   today_closing_price         17872 non-null  float64
 8   volume_traded               17872 non-null  float64
 9   dividend_mk                 17872 non-null  float64
 10  dividend_yield_pct          17872 non-null  float64
 11  earnings_yield_pct          17872 non-null  float64
 12  pe_ratio                    17872 non-null  float64
 13  pbv_ratio                   178

(17872, 19)

In [25]:
# Load the combined CSV file into a DataFrame
combined_stock_df = pd.read_csv(output_file)
print(combined_stock_df.head())
print(combined_stock_df["counter"].unique())

company_map = {
    "AIRTEL": "AIRTEL MALAWI PLC",
    "BHL": "BLANTYRE HOTELS PLC",
    "FDHB": "FDH BANK PLC",
    "FMBCH": "FMB CAPITAL HOLDINGS PLC",
    "ICON": "ICON PROPERTIES PLC",
    "ILLOVO": "ILLOVO SUGAR MALAWI PLC",
    "MPICO": "MPICO PLC",
    "NBM": "NATIONAL BANK OF MALAWI",
    "NBS": "NBS BANK PLC",
    "NICO": "NICO HOLDINGS PLC",
    "NITL": "NATIONAL INVESTMENT TRUST PLC",
    "OMU": "OLD MUTUAL LIMITED",
    "PCL": "PRESS CORPORATION PLC",
    "STANDARD": "STANDARD BANK MALAWI PLC",
    "SUNBIRD": "SUNBIRD TOURISM PLC",
    "TNM": "TELEKOM NETWORKS MALAWI PLC"
}
# Add a new column with the full name
combined_stock_df["name"] = combined_stock_df["counter"].map(company_map)
combined_stock_df = combined_stock_df.rename(columns={
    "counter": "ticker",
    "trade_date": "date_listed",
    "buy_price": "listing_price"
})

   counter_id counter  daily_range_high  daily_range_low  buy_price  \
0           1  AIRTEL               NaN              NaN      22.05   
1           2     BHL               NaN              NaN       0.00   
2           3    FDHB              15.0             12.5      12.50   
3           4   FMBCH              25.5             25.5      22.00   
4           5    ICON              12.7             12.7      12.70   

   sell_price  previous_closing_price  today_closing_price  volume_traded  \
0        0.00                   22.00                22.00            0.0   
1       12.94                   12.94                12.94            0.0   
2       15.00                   10.00                14.99      1118097.0   
3       25.50                   25.50                25.50        20000.0   
4       13.00                   12.70                12.70       194000.0   

   dividend_mk  dividend_yield_pct  earnings_yield_pct  pe_ratio  pbv_ratio  \
0         1.25                5

In [7]:
from dotenv import load_dotenv
import os

load_dotenv()

PGHOST = os.getenv("PGHOST", "").strip()
PGPORT = os.getenv("PGPORT", "").strip()
PGPORT = int(''.join(filter(str.isdigit, PGPORT))) if PGPORT else 5432
PGDATABASE = os.getenv("PGDATABASE", "").strip()
PGUSER = os.getenv("PGUSER", "").strip()

# Optional: print to confirm
print(PGHOST, PGPORT, PGDATABASE, PGUSER)


localhost 5432 mse_database postgres


# sql tools for create and link database and tables
D:\Documents\AIMS_DSCBI_Training\mse-api-assignment>psql -U postgres
CREATE DATABASE mse_database; # Create a database
postgres=# \c mse_database # connect to database or postgres=# \connect mse_database
mse_database=# \dt # check content(tables) of the db
## Create tables under database (mse_database)
CREATE TABLE IF NOT EXISTS counters (
    counter_id TEXT PRIMARY KEY,
    ticker TEXT NOT NULL,
    name TEXT NOT NULL,
    date_listed DATE,
    listing_price NUMERIC(10,2)
);

CREATE TABLE IF NOT EXISTS prices_daily (
    counter_id TEXT REFERENCES counters(counter_id),
    trade_date DATE,
    open_mwk NUMERIC(10,2),
    high_mwk NUMERIC(10,2),
    low_mwk NUMERIC(10,2),
    close_mwk NUMERIC(10,2),
    volume BIGINT,
    PRIMARY KEY (counter_id, trade_date)
);
# populate the tables(counters) of database 
INSERT INTO counters (counter_id, ticker, name, date_listed, listing_price)
OVERRIDING SYSTEM VALUE
VALUES
  (1,  'AIRTEL',   'Airtel Malawi plc',                          '2020-02-24', 12.69),
  (2,  'BHL',      'Blantyre Hotels plc',                        '1997-03-25',  0.84),
  (3,  'FDHB',     'FDH Bank plc',                               '2020-08-03', 10.00),
  (4,  'FMBCH',    'FMB Capital Holdings plc',                   '2017-09-18', 45.01),
  (5,  'ICON',     'Icon Properties plc',                        '2019-01-21',  8.75),
  (6,  'ILLOVO',   'Illovo Sugar Malawi plc',                    '1997-11-10',  2.25),
  (7,  'MPICO',    'Malawi Property Investment Company plc',     '2007-08-28',  1.00),
  (8,  'NBM',      'National Bank of Malawi plc',                '2000-08-21',  4.00),
  (9,  'NBS',      'NBS Bank plc',                               '2007-06-25',  2.60),
  (10, 'NICO',     'NICO Holdings plc',                          '1996-11-11',  2.00),
  (11, 'NITL',     'National Investment Trust plc',              '2005-03-21',  2.65),
  (12, 'OMU',      'Old Mutual Limited',                         '2018-06-26', 1580.22),
  (13, 'PCL',      'Press Corporation plc',                      '1998-09-09', 14.89),
  (14, 'STANDARD', 'Standard Bank Malawi plc',                   '1998-06-29',  3.25),
  (15, 'SUNBIRD',  'Sunbird Tourism plc',                        '2002-08-21',  2.60),
  (16, 'TNM',      'Telekom Networks Malawi plc',                '2008-11-25',  5.00);
## check content of counter table
  mse_database=# \d counters
## drop tables from database
mse_database=# DROP TABLE counter CASCADE; # drop all relations/references
mse_database=# DROP TABLE counter;
  
## To browse the data from table counters
mse_database=# SELECT * FROM counters;
mse_database-# limit(10);
mse_database-# SELECT counter_id, trade_date FROM counters;
mse_database=# \d+ prices_daily #check general information of table


## Update existing counter_id values in counters table
UPDATE counters SET counter_id = 'MWAIRT001156' WHERE ticker = 'AIRTEL';
UPDATE counters SET counter_id = 'MWBHL0010029' WHERE ticker = 'BHL';
UPDATE counters SET counter_id = 'MWFDHB001166' WHERE ticker = 'FDH';
UPDATE counters SET counter_id = 'MWFMB0010138' WHERE ticker = 'FMB';
UPDATE counters SET counter_id = 'MWICON001146' WHERE ticker = 'ICON';
UPDATE counters SET counter_id = 'MWILLV010032' WHERE ticker = 'ILLVO';
UPDATE counters SET counter_id = 'MWMPI0010116' WHERE ticker = 'MPI';
UPDATE counters SET counter_id = 'MWNBM0010074' WHERE ticker = 'NBM';
UPDATE counters SET counter_id = 'MWNBS0010105' WHERE ticker = 'NBS';
UPDATE counters SET counter_id = 'MWNICO010014' WHERE ticker = 'NICO';
UPDATE counters SET counter_id = 'MWNITL010091' WHERE ticker = 'NITL';
UPDATE counters SET counter_id = 'ZAE000255360' WHERE ticker = 'OMU';
UPDATE counters SET counter_id = 'MWPCL0010053' WHERE ticker = 'PCL';
UPDATE counters SET counter_id = 'MWSTD0010041' WHERE ticker = 'STANDARD';
UPDATE counters SET counter_id = 'MWSTL0010085' WHERE ticker = 'SUNBIRD';
UPDATE counters SET counter_id = 'MWTNM0010126' WHERE ticker = 'TNM';


Step 1: Drop existing table (optional)
DROP TABLE IF EXISTS prices_daily;

CREATE TABLE IF NOT EXISTS prices_daily (
    counter_id BIGINT,
    counter TEXT,
    daily_range_high NUMERIC(15,2),
    daily_range_low NUMERIC(15,2),
    buy_price NUMERIC(15,2),
    sell_price NUMERIC(15,2),
    previous_closing_price NUMERIC(15,2),
    today_closing_price NUMERIC(15,2),
    volume_traded NUMERIC(15,2),
    dividend_mk NUMERIC(15,2),
    dividend_yield_pct NUMERIC(15,2),
    earnings_yield_pct NUMERIC(15,2),
    pe_ratio NUMERIC(15,2),
    pbv_ratio NUMERIC(15,2),
    market_capitalization_mkmn NUMERIC(20,2),
    profit_after_tax_mkmn NUMERIC(20,2),
    num_shares_issue NUMERIC(20,2),
    trade_date DATE,
    print_time TEXT,
    PRIMARY KEY (counter_id, trade_date)
);
## add variable column into table
ALTER TABLE prices_daily
ADD COLUMN sell_price NUMERIC(10,2);

# link whole csv file to sql database directly
COPY prices_daily FROM 'D:\\Documents\\AIMS_DSCBI_Training\\mse-api-assignment\\data\\combined_output_data\\combined_reports.csv' DELIMITER ',' CSV HEADER;

# link csv file to sql database (directly import specific variables)
\copy prices_daily(counter_id, counter, daily_range_high, daily_range_low, buy_price, sell_price, previous_closing_price, today_closing_price, volume_traded, dividend_mk, dividend_yield_pct, earnings_yield_pct, pe_ratio, pbv_ratio, market_capitalization_mkmn, profit_after_tax_mkmn, num_shares_issue, trade_date, print_time) FROM 'D:/Documents/AIMS_DSCBI_Training/mse-api-assignment/data combined_output_data/combined_reports.csv' CSV HEADER;


### change the decimal formats of figures
ALTER TABLE prices_daily
ALTER COLUMN column volume_traded TYPE NUMERIC(15,2);


In [6]:
import pandas as pd
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Read CSV 
df = pd.read_csv(r"D:\Documents\AIMS_DSCBI_Training\mse-api-assignment\data\misc\listing.csv")

# Create PostgreSQL connection
## Create SQLAlchemy engine (no password needed for local connections)
engine = create_engine("postgresql+psycopg2://postgres:admin@localhost:5432/mse_database")

# Write to SQL
mse_counters = df.to_sql("counters", engine, if_exists="replace", index=False)

print("✅ Data successfully written to SQL table 'counters'")
print(mse_counters)

# Verify by querying the table
from sqlalchemy import text
with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM listing LIMIT 5;"))
    for row in result:
        print(row)
        print(result)

✅ Data successfully written to SQL table 'listing'


In [26]:
# Read CSV 
df = pd.read_csv(r"D:\Documents\AIMS_DSCBI_Training\mse-api-assignment\data\combined_output_data\combined_reports.csv")

headers_vars = [
    'counter_id', 'counter', 'daily_range_high', 'daily_range_low', 'buy_price',
    'sell_price', 'previous_closing_price', 'today_closing_price', 'volume_traded', 'dividend_mk',
    'dividend_yield_pct', 'earnings_yield_pct', 'pe_ratio', 'pbv_ratio',
    'market_capitalization_mkmn', 'profit_after_tax_mkmn', 'num_shares_issue',
    'trade_date', 'print_time']
df = df[headers_vars]
# Create PostgreSQL connection
## Create SQLAlchemy engine (no password needed for local connections)
engine = create_engine("postgresql+psycopg2://postgres:admin@localhost:5432/mse_database")

# Write to SQL
mse_dailyPrices = df.to_sql("daily_prices", engine, if_exists="replace", index=False)
print(mse_dailyPrices)
print("✅ Data successfully written to SQL table 'daily_prices'")


872
✅ Data successfully written to SQL table 'daily_prices'
