<a href="https://colab.research.google.com/github/Dumi-coder/CeylonPulse/blob/apiDum/Copy_of_Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import pandas as pd

# --- 1. ASPI: Scraping TradingEconomics (Best effort) ---
def fetch_aspi_data():
    """Fetches ASPI data (Value, Change, %Change) from TradingEconomics."""
    url = "https://tradingeconomics.com/sri-lanka/stock-market"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    aspi_data = {"Value": "N/A", "Change": "N/A", "Pct": "N/A"}

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the main table containing the indices
        index_table = soup.find('table', {'class': 'table-hover'})
        if not index_table:
             raise Exception("Index table not found.")

        # Iterate through table rows
        for row in index_table.find_all("tr"):
            cols = row.find_all("td")

            # Check if the row is for ASPI or All Share
            if len(cols) > 0 and ("ASPI" in cols[0].get_text() or "All Share" in cols[0].get_text()):
                # Extracting data based on expected column position (Value, Change, Pct)
                # The data is usually in columns 1, 2, and 3

                # Check for sufficient columns before accessing
                if len(cols) > 1:
                    aspi_data["Value"] = cols[1].get_text().strip()
                if len(cols) > 2:
                    aspi_data["Change"] = cols[2].get_text().strip()
                if len(cols) > 3:
                    aspi_data["Pct"] = cols[3].get_text().strip()

                break
    except Exception as e:
        aspi_data["Error"] = f"ASPI Scraper failed: {e}"

    return aspi_data

# --- 2. S&P SL20: Using yfinance API (Guaranteed reliable data) ---
def fetch_sl20_data():
    """Fetches S&P SL20 data (Value, Change, %Change) from Yahoo Finance API."""
    ticker_symbol = '^SPLK20LP'
    sl20_data = {"Value": "N/A", "Change": "N/A", "Pct": "N/A"}

    try:
        ticker = yf.Ticker(ticker_symbol)
        data = ticker.info

        current_value = data.get('regularMarketPrice')
        change_amount = data.get('regularMarketChange')
        change_pct = data.get('regularMarketChangePercent')

        # Format the numbers
        sl20_data["Value"] = f"{current_value:,.2f}" if current_value is not None else "N/A"

        if change_amount is not None:
            sign = '+' if change_amount >= 0 else ''
            sl20_data["Change"] = f"{sign}{change_amount:,.2f}"
            sl20_data["Pct"] = f"{sign}{change_pct:.2f}%"
        else:
            sl20_data["Change"] = "N/A"
            sl20_data["Pct"] = "N/A"

    except Exception as e:
        sl20_data["Error"] = f"SL20 API failed: {e}"

    return sl20_data

# --- Main Execution and Output ---

aspi_result = fetch_aspi_data()
sl20_result = fetch_sl20_data()

print("\n" + "="*20)
print(" CSE Market Indices")
print("="*20)

# ASPI Output
print(" ASPI ")
print(f" {aspi_result['Value']} ")
print(f" {aspi_result['Change']} ")
print(f" {aspi_result['Pct']} ")

if 'Error' in aspi_result:
    print(f">>> ASPI Note: {aspi_result['Error']}")

print("-" * 20)

# S&P SL20 Output
print(" S&P SL20 ")
print(f" {sl20_result['Value']} ")
print(f" {sl20_result['Change']} ")
print(f" {sl20_result['Pct']} ")

if 'Error' in sl20_result:
    print(f">>> SL20 Note: {sl20_result['Error']}")

print("="*20)


 CSE Market Indices
 ASPI 
 21,497.08 
  
 -329.51 
--------------------
 S&P SL20 
 5,925.63 
 -105.03 
 -1.74% 


In [None]:
import pandas as pd
from google.colab import files
import io

# --- 1. Define File Names and Parameters ---
# IMPORTANT: Adjust these names to match the CSV files you download.
ASPI_FILE_NAME = 'ASPI_Historical_Data.csv'
SL20_FILE_NAME = 'SL20_Historical_Data.csv'
OUTPUT_CSV = 'CSE_Indices_Consolidated_Data_Daily.csv'

# --- 2. Function to Upload Files to Colab ---
def upload_data_files():
    """Prompts the user to upload the two required CSV files."""
    print("Please upload your two historical data files:")
    print(f"1. The ASPI data file (e.g., named: {ASPI_FILE_NAME})")
    print(f"2. The S&P SL20 data file (e.g., named: {SL20_FILE_NAME})")

    uploaded = files.upload()

    if ASPI_FILE_NAME not in uploaded or SL20_FILE_NAME not in uploaded:
        print("\nðŸ›‘ Error: Both required files were not found among the uploaded files.")
        print("Please ensure the file names match exactly.")
        return None, None

    aspi_content = io.BytesIO(uploaded[ASPI_FILE_NAME])
    sl20_content = io.BytesIO(uploaded[SL20_FILE_NAME])

    print("\nâœ… Files uploaded successfully.")
    return aspi_content, sl20_content

# --- 3. Function to Process and Merge Data ---
def process_and_merge_data(aspi_content, sl20_content):
    """Loads, cleans, and merges the dataframes."""

    # --- A. Load and Clean ASPI Data ---
    # Common separators are ',' or ';' (try to infer or specify 'sep' if needed)
    aspi_df = pd.read_csv(aspi_content, parse_dates=['Date'])

    # Standardize column names (adjust these if your download uses different names!)
    aspi_df = aspi_df.rename(columns={
        'Close': 'ASPI_Close',
        'Price': 'ASPI_Close',
        'Date': 'Date'
    })

    # Keep only the Date and the Closing Price (your request focuses on the index value)
    aspi_df = aspi_df[['Date', 'ASPI_Close']]
    print(f"ASPI Data loaded. Total records: {len(aspi_df)}")

    # --- B. Load and Clean S&P SL20 Data ---
    sl20_df = pd.read_csv(sl20_content, parse_dates=['Date'])

    # Standardize column names (adjust these if your download uses different names!)
    sl20_df = sl20_df.rename(columns={
        'Close': 'SL20_Close',
        'Price': 'SL20_Close',
        'Date': 'Date'
    })

    # Keep only the Date and the Closing Price
    sl20_df = sl20_df[['Date', 'SL20_Close']]
    print(f"S&P SL20 Data loaded. Total records: {len(sl20_df)}")

    # --- C. Merge DataFrames ---
    # Use an 'outer' merge to ensure we capture all dates from BOTH indices.
    merged_df = pd.merge(aspi_df, sl20_df, on='Date', how='outer')

    # Sort by date
    merged_df = merged_df.sort_values(by='Date').reset_index(drop=True)

    return merged_df

# --- 4. Function to Extract Features and Finalize ---
def extract_features(df):
    """Extracts Year, Month, and Day features."""

    print("\nExtracting time features (Year, Month, Daily)...")

    # Fill NaN values in the index columns with 0 for cleaner output
    # (These NaN values are expected when one index has no data for that date, e.g., SL20 before 2012)
    df['ASPI'] = df['ASPI_Close'].fillna(0)
    df['SL20'] = df['SL20_Close'].fillna(0)

    # Extract the requested time components
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Daily'] = df['Date'].dt.day  # Represents the day of the month

    # Select and reorder the final columns as requested
    final_df = df[['ASPI', 'SL20', 'Year', 'Month', 'Daily', 'Date']]

    return final_df

# --- 5. Main Execution Block ---
def main():
    aspi_content, sl20_content = upload_data_files()

    if aspi_content and sl20_content:
        merged_df = process_and_merge_data(aspi_content, sl20_content)
        final_dataset = extract_features(merged_df)

        # Display the first few rows
        print("\n--- Final Dataset Preview ---")
        print(final_dataset.head())
        print(f"\nTotal Merged Records: {len(final_dataset)}")

        # Save to a new CSV file
        final_dataset.to_csv(OUTPUT_CSV, index=False)

        # Download the final CSV file
        files.download(OUTPUT_CSV)
        print(f"\nâœ… Data processing complete. The file '{OUTPUT_CSV}' has been downloaded to your computer.")

# Run the script
if __name__ == '__main__':
    main()

Please upload your two historical data files:
1. The ASPI data file (e.g., named: ASPI_Historical_Data.csv)
2. The S&P SL20 data file (e.g., named: SL20_Historical_Data.csv)
