## Equity Ranking and Scoring of top 100 Market cap companies listed in CSE

In [1]:
#Importing necessary libraries
import os
import glob
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [2]:
#Directory of trading data
DATA_DIR = "SLTOP100/"

#Feature columns to compute
FEATURES = [
    'mean_return',          # Average daily return
    'volatility',           # Standard deviation of daily returns
    'sharpe_ratio',         # Risk-adjusted return (mean / volatility)
    'max_drawdown',         # Maximum drawdown
    'momentum_20d',         # 20-day momentum
    'volume_change',        # Recent volume change
    'turnover_change'       # Recent turnover change
]

In [3]:
#Data preprocessing & feature computing
#Removing commas and convert numeric strings into floats
def clean_numeric_series(s: pd.Series) -> pd.Series:
    return s.astype(str).str.replace(',', '').astype(float)

#Load a single company's data
def load_and_process(file_path: str) -> pd.Series:
    df = pd.read_csv(
        file_path,
        parse_dates=['Trade Date'],
        dayfirst=True,
        dtype=str
    )

    #Clean numeric columns
    for col in ['Open (Rs.)', 'High (Rs.)', 'Low (Rs.)', 'Close (Rs.)',
                'TradeVolume', 'ShareVolume', 'Turnover (Rs.)']:
        df[col] = clean_numeric_series(df[col])

    #Sort by date
    df = df.sort_values('Trade Date').reset_index(drop=True)

    #Compute daily returns
    df['return'] = df['Close (Rs.)'].pct_change()

    #Feature: mean daily return
    mean_return = df['return'].mean()

    #Feature: volatility (std of returns)
    volatility = df['return'].std()

    #Feature: sharpe ratio (annualized)
    sharpe_ratio = (mean_return / volatility) * np.sqrt(252) if volatility != 0 else 0

    #Feature: max drawdown
    cum_returns = (1 + df['return']).cumprod()
    rolling_max = cum_returns.cummax()
    drawdown = cum_returns / rolling_max - 1
    max_drawdown = drawdown.min()

    #Feature: momentum (change over 20 trading days)
    if len(df) >= 21:
        momentum_20d = df['Close (Rs.)'].iloc[-1] / df['Close (Rs.)'].iloc[-21] - 1
    else:
        momentum_20d = np.nan

    #Feature: recent volume change (last 20 days vs prior 20 days)
    if len(df) >= 40:
        vol_recent = df['ShareVolume'].iloc[-20:].mean()
        vol_prior = df['ShareVolume'].iloc[-40:-20].mean()
        volume_change = (vol_recent / vol_prior - 1) if vol_prior != 0 else np.nan
    else:
        volume_change = np.nan

    #Feature: recent turnover change (20-day)
    if len(df) >= 40:
        turn_recent = df['Turnover (Rs.)'].iloc[-20:].mean()
        turn_prior = df['Turnover (Rs.)'].iloc[-40:-20].mean()
        turnover_change = (turn_recent / turn_prior - 1) if turn_prior != 0 else np.nan
    else:
        turnover_change = np.nan

    return pd.Series([
        mean_return,
        volatility,
        sharpe_ratio,
        max_drawdown,
        momentum_20d,
        volume_change,
        turnover_change
    ], index=FEATURES)


In [4]:
#Building the feature DataFrame
def build_feature_matrix(data_dir: str) -> pd.DataFrame:
    all_files = glob.glob(os.path.join(data_dir, '*.csv'))
    feature_list = []
    tickers = []

    for file_path in all_files:
        try:
            features = load_and_process(file_path)
            ticker = os.path.splitext(os.path.basename(file_path))[0]
            feature_list.append(features)
            tickers.append(ticker)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    feature_df = pd.DataFrame(feature_list, index=tickers)
    return feature_df

In [5]:
#Computing equity scores
def compute_scores(feature_df: pd.DataFrame) -> pd.DataFrame:
    #Handling missing values
    feature_df = feature_df.fillna(feature_df.mean())

    #Standardizing
    scaler = StandardScaler()
    scaled = scaler.fit_transform(feature_df)

    #PCA: first principal component as score
    pca = PCA(n_components=1)
    scores = pca.fit_transform(scaled)

    #Higher scores = better performance
    score_df = pd.DataFrame(scores, index=feature_df.index, columns=['score'])
    score_df['score'] = score_df['score'].rank(pct=True) 

    return score_df

In [6]:
def main():
    #Building features
    features = build_feature_matrix(DATA_DIR)
    print("Feature matrix built with shape:", features.shape)

    #Computing scores
    scores = compute_scores(features)
    print(scores.head())

    #Save scores to CSV
    scores.to_csv('SLTOP100 company_scores.csv')
    print("Scores of SLTOP100 saved to SLTOP100 company_scores.csv")


if __name__ == '__main__':
    main()

Feature matrix built with shape: (100, 7)
             score
trades_AAIC   0.88
trades_ABL    0.97
trades_ACL    0.73
trades_AEL    0.83
trades_AHPL   0.21
Scores of SLTOP100 saved to SLTOP100 company_scores.csv
