Data

In [1]:
import pandas as pd
import numpy as np

In [3]:
import re
import datetime

def convert_js_data_from_file(filepath, variable):
    # Step 1: Read content from the file
    with open(filepath, "r", encoding="utf-8") as f:
        js_code = f.read()

    # Step 2: Extract the JS array using regex
    pattern = rf"{re.escape(variable)}\s*=\s*(\[(?:.|\n)*?\]);"
    match = re.search(pattern, js_code, re.DOTALL)
    if not match:
        raise ValueError(f"Variable '{variable}' not found in file.")

    array_str = match.group(1)

    # Step 3: Replace JS-style 'new Date(...)' with Python datetime.date(...)
    def replace_dates(js):
        def replacer(m):
            y, mth, d = map(int, m.groups())
            return f"datetime.date({y}, {mth + 1}, {d})"  # JS months are 0-based
        return re.sub(r"new Date\((\d+),(\d+),(\d+)\)", replacer, js)

    array_str = replace_dates(array_str)

    # Step 4: Replace null with None
    array_str = array_str.replace("null", "None")

    # Step 5: Replace JS objects like {type: 'number', label: 'ASN'} with just the label
    array_str = re.sub(r"\{[^{}]*'label'\s*:\s*'([^']+)'\s*\}", r"'\1'", array_str)

    # Step 6: Evaluate safely
    try:
        data = eval(array_str, {"datetime": datetime})
        return data
    except Exception as e:
        print("Error evaluating JS array:", e)
        return None

# 🧪 Usage
data = convert_js_data_from_file("data.txt", "et.rsStat.tooltipData")
data


[['Date',
  'ASN',
  'BURN',
  'MDKD',
  'MDF',
  'ADRA',
  'CJN',
  'BQA',
  'ODM',
  'VSU',
  'GBA',
  'CDGR',
  'SLB',
  'MDN',
  'KGP',
  'PKU',
  'TMZ',
  'DZKT',
  'HLZ'],
 [datetime.date(2024, 4, 29),
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None],
 [datetime.date(2024, 4, 30),
  0,
  4,
  10,
  9,
  8,
  13,
  9,
  9,
  5,
  4,
  6,
  9,
  2,
  9,
  0,
  0,
  0,
  14],
 [datetime.date(2024, 5, 1),
  0,
  5,
  2,
  2,
  4,
  20,
  20,
  19,
  16,
  11,
  6,
  5,
  2,
  5,
  12,
  10,
  6,
  10],
 [datetime.date(2024, 5, 2),
  0,
  5,
  2,
  1,
  4,
  10,
  6,
  6,
  5,
  4,
  3,
  2,
  45,
  36,
  22,
  21,
  17,
  10],
 [datetime.date(2024, 5, 3),
  0,
  6,
  3,
  2,
  1,
  9,
  6,
  5,
  1,
  1,
  2,
  2,
  1,
  7,
  43,
  5,
  1,
  14],
 [datetime.date(2024, 5, 4),
  0,
  4,
  0,
  1,
  6,
  22,
  18,
  17,
  13,
  6,
  3,
  1,
  2,
  3,
  0,
  7,
  3,
  14],
 [datetime.date(2024,

In [7]:
df = pd.DataFrame(data[1:], columns=data[0])
df.insert(1, "Day", df["Date"].apply(lambda d: d.strftime('%A')))
df.head()

Unnamed: 0,Date,Day,ASN,BURN,MDKD,MDF,ADRA,CJN,BQA,ODM,VSU,GBA,CDGR,SLB,MDN,KGP,PKU,TMZ,DZKT,HLZ
0,2024-04-29,Monday,,,,,,,,,,,,,,,,,,
1,2024-04-30,Tuesday,0.0,4.0,10.0,9.0,8.0,13.0,9.0,9.0,5.0,4.0,6.0,9.0,2.0,9.0,0.0,0.0,0.0,14.0
2,2024-05-01,Wednesday,0.0,5.0,2.0,2.0,4.0,20.0,20.0,19.0,16.0,11.0,6.0,5.0,2.0,5.0,12.0,10.0,6.0,10.0
3,2024-05-02,Thursday,0.0,5.0,2.0,1.0,4.0,10.0,6.0,6.0,5.0,4.0,3.0,2.0,45.0,36.0,22.0,21.0,17.0,10.0
4,2024-05-03,Friday,0.0,6.0,3.0,2.0,1.0,9.0,6.0,5.0,1.0,1.0,2.0,2.0,1.0,7.0,43.0,5.0,1.0,14.0
