In [3]:
#import libraries
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, Dropout,Input
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [4]:
fundamental_folder = "data//fundamental"
fund_files = [file for file in os.listdir(fundamental_folder) if file.endswith("xlsx")]
fund_path = os.path.join(fundamental_folder, fund_files[0])
df1 = pd.read_excel(fund_path, index_col="Symbol")
print(df1.head())

                  Sector  Sharesoutstanding  MarketPrice    EPS  P/E_ratio  \
Symbol                                                                       
aclbsl      microfinance       3.671434e+06       1035.0  22.27      46.48   
adbl    commercial banks       1.385522e+08        285.0  14.61      19.51   
ahl           hydropower       3.400000e+06        627.5 -12.02     -52.20   
ahpc          hydropower       3.848003e+07        294.1  11.85      24.82   
akjcl         hydropower       2.000000e+07        205.0   2.25      91.11   

        bookvalue   PBV  
Symbol                   
aclbsl     147.28  7.03  
adbl       225.64  1.26  
ahl         86.11  7.29  
ahpc       109.56  2.68  
akjcl       90.04  2.28  


In [5]:
import os

data_folder = "data/historical"  # Cleaned path

all_stocks = {}

# Load each stock's data
for file in os.listdir(data_folder):
    if file.endswith(".xlsx") and not file.startswith("~$"):
        symbol = os.path.splitext(file)[0]
        df = pd.read_excel(os.path.join(data_folder, file))
        all_stocks[symbol] = df

# Display sample of each stock
for symbol, data in all_stocks.items():
    print(f"\n--- {symbol} ---")
    print(data.head(2))  



--- aclbsl ---
   #   Date      LTP  % Change    High     Low    Open  Qty.   Turnover
0  1  45805  1075.19     -0.34  1080.0  1040.2  1057.4  1787  1895715.7
1  2  45804  1078.91     -1.92  1100.0  1078.0  1100.0  2166  2337817.3

--- adbl ---
   #   Date     LTP  % Change   High    Low   Open   Qty.    Turnover
0  1  45805  299.60      1.75  300.0  291.8  293.0  49484  14693755.4
1  2  45804  294.44     -0.88  296.0  290.0  295.0  37980  11143808.2

--- ahl ---
   #   Date     LTP  % Change   High    Low   Open   Qty.   Turnover
0  1  45805  652.42     -1.76  690.0  645.0  677.0  11680  7856571.9
1  2  45804  664.11     -0.95  669.9  646.9  657.2   2357  1540853.2

--- ahpc ---
   #   Date     LTP  % Change   High    Low   Open    Qty.     Turnover
0  1  45805  290.73      4.23  292.7  279.5  280.0  444839  128011890.3
1  2  45804  278.94     -0.30  284.0  276.2  280.0  182823   51167226.5

--- akjcl ---
   #   Date     LTP  % Change   High    Low   Open    Qty.    Turnover
0  1  45

In [6]:
import requests
from bs4 import BeautifulSoup

def fetch_nepse_headlines(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    headlines = []
    for a in soup.find_all('a'):
        text = a.get_text(strip=True)
        if text and ('shares' in text.lower() or 'dividend' in text.lower() or 'bank' in text.lower()):
            headlines.append(text)
    
    return headlines[:10]  # limit to 10 for example

# Example: replace with a real NEPSE company news URL
url = 'https://www.sharesansar.com'
headlines = fetch_nepse_headlines(url)
print(headlines)


['Proposed Dividend', 'Merchant Bankers', 'Commercial Banks', 'Dividend Calculator', 'Kamana Sewa Bikas Bank Provides Financial Literacy to the Visually Impaired in Kathmandu', 'Mahalaxmi Bikas Bank Celebrates 31st Anniversary with Employee Honors and Toastmasters Launch', 'Guheswori Merchant Banking & Finance Enters 24th Year with Continued Commitment to Social Responsibility', 'Systematic Investment in Shares (SIP in Shares): A Concept for the Nepali Market', 'Steering Nepal Rastra Bank to Economic Resilience and Market Stability with Newly Elected Governor D...', '2025-06-16The existing promoter of Kumari Bank Limited is selling 7,881 shares to the general public.']


In [7]:

# !pip install torch transformers --quiet

In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# -------------------------------
# Load FinBERT Model
# -------------------------------
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

# -------------------------------
# Define sentiment function
# -------------------------------
def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = torch.argmax(probs).item()
    label_map = {0: "negative", 1: "neutral", 2: "positive"}
    return label_map[sentiment]

# -------------------------------
# Web scraping function
# -------------------------------
def fetch_nepse_headlines(url):
    headers = {'User-Agent': 'Mozilla/5.0'}  # prevent blocking
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    headlines = []
    for a in soup.find_all('a'):
        text = a.get_text(strip=True)
        if text and any(keyword in text.lower() for keyword in ['shares', 'dividend', 'bank', 'ipo', 'capital', 'right issue']):
            headlines.append(text)
    
    return headlines[:10]  # Limit to 10 for performance

# -------------------------------
# Fetch and analyze headlines
# -------------------------------
url = 'https://www.sharesansar.com'
headlines = fetch_nepse_headlines(url)

if not headlines:
    print("⚠️ No relevant headlines found.")
    sentiment_score = 0
else:
    news_df = pd.DataFrame({"headline": headlines})
    news_df["sentiment"] = news_df["headline"].map(get_sentiment)

    sentiment_score = (news_df["sentiment"] == "positive").sum() - (news_df["sentiment"] == "negative").sum()

    print(news_df)
    print("🧠 Sentiment score for NEPSE news:", sentiment_score)





Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


                                            headline sentiment
0                                Capital Expenditure  negative
1                                  Proposed Dividend  negative
2                                            IPO/FPO  negative
3                               Check IPO/FPO Result  negative
4                                       IPO/FPO News  negative
5                             IPO/FPO Allotment News  negative
6                                   Merchant Bankers  negative
7                                   Commercial Banks  negative
8                                Dividend Calculator  negative
9  Kamana Sewa Bikas Bank Provides Financial Lite...  negative
🧠 Sentiment score for NEPSE news: -10


In [None]:
    # print(df[features].dtypes)
    # print(df[features].head())


In [18]:
import joblib
import os

sentiment_score = -10

# Create model output directory
model_dir = "allmodels/gru_fundamental"
os.makedirs(model_dir, exist_ok=True)

# ✅ Initialize and fit LabelEncoder for 'Sector'
sector_encoder = LabelEncoder()
if isinstance(df1, pd.DataFrame) and 'Sector' in df1.columns:
    sector_encoder.fit(df1['Sector'].dropna().astype(str))
else:
    raise ValueError("df1 must be a DataFrame with a 'Sector' column.")

# ✅ Loop through each stock
for symbol, data in all_stocks.items():
    if symbol not in df1.index:
        print(f"⚠️ Skipping {symbol.upper()}: No fundamental data.")
        continue

    # Check if model already exists
    model_path = os.path.join(model_dir, f"{symbol.upper()}_GRU.keras")
    scaler_path = os.path.join(model_dir, f"{symbol.upper()}_scaler.save")

    if os.path.exists(model_path) and os.path.exists(scaler_path):
        print(f"⏩ Skipping {symbol.upper()}: Model already exists.")
        continue

    df = data.copy()
    df.columns = df.columns.str.strip()  # Clean column names

    #  Add fundamental data to each row
    fundamentals = df1.loc[symbol]
    for col in ['Sector', 'Sharesoutstanding', 'EPS', 'P/E_ratio', 'bookvalue', 'PBV']:
        df[col.upper()] = fundamentals[col]

    #  Encode sector for model input
    df['SECTOR'] = sector_encoder.transform([fundamentals['Sector']])[0]

    # Add sentiment score
    df['SENTIMENT_SCORE'] = sentiment_score

    # ✅ Drop rows with missing values in relevant columns
    required_cols = [
        'LTP', '% Change', 'High', 'Low', 'Open', 'Qty.', 'Turnover',
        'SECTOR', 'EPS', 'P/E_RATIO', 'BOOKVALUE', 'PBV', 'SHARESOUTSTANDING'
    ]
    df = df.dropna(subset=required_cols)

    # Add target variable: next day's LTP
    df['Target_LTP'] = df['LTP'].shift(-1)
    df = df.dropna(subset=['Target_LTP'])

    # Define features
    features = [
        '% Change', 'High', 'Low', 'Open', 'Qty.', 'Turnover',
        'SECTOR', 'EPS', 'P/E_RATIO', 'BOOKVALUE', 'PBV',
        'SHARESOUTSTANDING', 'SENTIMENT_SCORE'
    ]

    # Extract features and target
    X = df[features].astype(float).values  # ensure all values are float
    y = df['Target_LTP'].astype(float).values

    #Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Normalize features
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    #Reshape for GRU input: [samples, timesteps, features]
    X_train_scaled = np.expand_dims(X_train_scaled, axis=1)
    X_test_scaled = np.expand_dims(X_test_scaled, axis=1)

    #Define GRU model
    model = Sequential([
    Input(shape=(1, X_train_scaled.shape[2])),
    GRU(64, return_sequences=False),
    Dropout(0.2),
    Dense(1)
    ])

    model.compile(optimizer='adam', loss='mse')

    #Use early stopping
    early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)


    # Train model
    history = model.fit(
        X_train_scaled, y_train,
        validation_split=0.2,
        epochs=100,
        batch_size=32,
        verbose=0,
        callbacks=[early_stop]
    )
    
    # Evaluate on test set
    test_loss = model.evaluate(X_test_scaled, y_test, verbose=0)
    print(f"📊 {symbol.upper()} Test MSE: {test_loss:.4f}")
    
    # Plot training vs validation loss
    plt.figure(figsize=(6, 4))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{symbol.upper()} Loss Curve')
    plt.xlabel('Epochs')s
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    # Save model and scaler
    model.save(os.path.join(model_dir, f"{symbol.upper()}_GRU.keras"))
    joblib.dump(scaler, os.path.join(model_dir, f"{symbol.upper()}_scaler.save"))
    
    print(f"✅ {symbol.upper()} GRU model saved.")

    


⏩ Skipping ACLBSL: Model already exists.
⏩ Skipping ADBL: Model already exists.
⏩ Skipping AHL: Model already exists.
⏩ Skipping AHPC: Model already exists.
⏩ Skipping AKJCL: Model already exists.
⏩ Skipping AKPL: Model already exists.
⏩ Skipping ALBSL: Model already exists.
⏩ Skipping ALICL: Model already exists.
⏩ Skipping ANLB: Model already exists.
⏩ Skipping API: Model already exists.
⏩ Skipping AVYAN: Model already exists.
⏩ Skipping BARUN: Model already exists.
⏩ Skipping BBC: Model already exists.
⏩ Skipping BEDC: Model already exists.
⏩ Skipping BFC: Model already exists.
⏩ Skipping BGWT: Model already exists.
⏩ Skipping BHDC: Model already exists.
⏩ Skipping BHL: Model already exists.
⏩ Skipping BHPL: Model already exists.
⏩ Skipping BNHC: Model already exists.
⏩ Skipping BNL: Model already exists.
⏩ Skipping BPCL: Model already exists.
⏩ Skipping CBBL: Model already exists.
⏩ Skipping CFCL: Model already exists.
⏩ Skipping CGH: Model already exists.
⏩ Skipping CHCL: Model al