In [2]:
import re
import os
import bs4
import requests
import nepali_datetime as nep_dt
import pandas as pd
from langchain.document_loaders import WebBaseLoader

In [3]:
file_path = "../csv_files/real_time_data.csv"
loader = WebBaseLoader("https://www.nea.org.np/")
docs = loader.load()
page_content=docs[0].page_content

In [5]:
# Define patterns in a dictionary with their corresponding variable names
patterns = {
    'nea': r'NEA\s*[–-]\s*(\d+)\s*MWh',
    'subsidiary': r'NEA Subsidiary Companies\s*[–-]\s*(\d+)\s*MWh', 
    'ipp': r'IPP\s*[–-]\s*(\d+)\s*MWh',
    'imports': r'Import\s*[–-]\s*(\d+)\s*MWh',
    'interruption': r'Interruption\s*[–-]\s*(\d+)\s*MWh',
    'demand': r'National Energy Demand\s*[–-]\s*(\d+)\s*MWh',
    'peak': r'National Peak Demand\s*[–-]\s*(\d+)\s*MW'
}

# Extract all values in one pass using dictionary comprehension
values = {
    var_name: re.search(pattern, page_content).group(1) if re.search(pattern, page_content) else '0'
    for var_name, pattern in patterns.items()
}

# Create row of data directly from values dictionary
new_row = {
    'date': nep_dt.date.today(),
    'nea': values['nea'],
    'nea_subsidiary_cmpanies': values['subsidiary'], 
    'ipp': values['ipp'],
    'import': values['imports'],
    'interruption': values['interruption'],
    'national_energy_demand': values['demand'],
    'national_peak_demand': values['peak']
}

# Append to CSV file
try:
    df = pd.read_csv(file_path)
    df.loc[len(df)] = new_row  # More efficient than concat for single row
    df.to_csv(file_path, index=False)
    print(f"Successfully appended data for {nep_dt.date.today()}")
except Exception as e:
    print(f"Error appending to CSV: {str(e)}")


Successfully appended data for 2082-01-07
