# Phase 1 — NL → JSON Parser

This phase converts simple natural language strategy rules into a structured JSON representation.

Patterns covered such as:
- “close price above 20-day moving average”
- “volume above 1 million”
- “RSI(14) below 30”
- “price crosses above yesterday’s high”


In [None]:
import re # regular expressions
from typing import Dict, Any # type hints to describe input/output more clearly

### Parsing volumes like: '1M', '1 million', '500k'
It tries to find something like "1M", "1 million", "500k", "1.2M", etc.

In [None]:
def parse_volume(text:str):
    m=re.search(r'(\d+(?:\.\d+)?)\s*(m|M|million|k|K|thousand)?',text)
    # (\d+(?:\.\d+)?) → captures a number like 1, 1.2, 100, etc.
    # \s* → optional spaces
    # (m|M|million|k|K|thousand)? → million, thousand, k, M
    if not m:
        return None # if no number is found
    num=float(m.group(1)) # "1.2M" → 1.2
    unit=(m.group(2) or "").lower()  # Extract the unit part


    if unit in ("m", "million"): # 1M → 1,000,000
        return int(num * 1_000_000)
    if unit in ("k", "thousand"):
        return int(num * 1_000) # 500k → 500,000

    return int(num)
# It tries to find something like "1M", "1 million", "500k", "1.2M", etc.

### Normalizing indicators
This converts English phrases like:
- "20-day moving average" → "sma(close,20)"
- "RSI(14)" → "rsi(close,14)"

In [None]:
def normalize_indicator(text:str):
    # Example:"20-day moving average"
    m = re.search(r"(\d+)-day.*moving average", text, re.I)
    # Checks for pattern:
    # a number ((\d+))
    # followed by -day
    # followed later by moving average
    if m:
        n = int(m.group(1)) # Extract the number (here 20)
        return f"sma(close,{n})"

    # Example: "RSI(14)"
    m = re.search(r"RSI\s*\(?\s*(\d+)\s*\)?", text, re.I)
    if m:
        n = int(m.group(1))
        return f"rsi(close,{n})" # rsi(close,14)

    return None

### Extracting comparisons from a single sentence
comps will store rules like:
("close", ">", "sma(close,20)")

In [None]:
def extract_comparisons(text: str):
    comps = []

    # close above 20-day MA
    m = re.search(r"(close|price).*above.*(\d+-day .*moving average)", text, re.I)
    # (close|price) → match either "close" or "price"
    # .*above.* → anything followed by the word "above"
    # (\d+-day .*moving average) → capture "20-day ... moving average"
    if m:
        comps.append(("close", ">", normalize_indicator(m.group(2)))) # ("close", ">", "sma(close,20)")


    # volume above 1M
    m = re.search(r"volume.*above\s+([^\.,;]+)", text, re.I)
    if m:
        vol = parse_volume(m.group(1))
        if vol:
            comps.append(("volume", ">", vol))

    # crosses above yesterday high
    m = re.search(
        r"(close|price).*cross(?:es)?\s+above\s+(yesterday'?s high)",text,re.I,)
    if m:
        comps.append(("close", "crosses_above", "yesterday_high"))

    # RSI(14) below 30
    m = re.search(r"RSI\s*\(?\s*(\d+)\s*\)?.*below\s*(\d+)", text, re.I)
    if m:
        rsi_len = int(m.group(1))
        threshold = int(m.group(2))
        comps.append((f"rsi(close,{rsi_len})", "<", threshold))

    return comps

### MAIN FUNCTION

In [None]:
def nl_to_json(text: str) -> Dict[str, Any]:
    text = text.strip() # removing leading/trailing spaces
    sentences = re.split(r"[.\n]", text)

    entry_rules = []
    exit_rules = []

    for s in sentences:
        sl = s.lower().strip()
        if not sl:
            continue

        # classify whether it's entry or exit
        if any(k in sl for k in ["buy", "enter", "trigger entry"]):
            entry_rules += extract_comparisons(s)
        # If sentence contains words like:buy,enter,entry take as entry rule.
        elif any(k in sl for k in ["exit", "sell", "close position"]):
            exit_rules += extract_comparisons(s)
         # If sentence contains words like:exit,sell,close position take as exit rule.
        else:
            # fallback classification
            if "rsi" in sl or "exit" in sl:
                exit_rules += extract_comparisons(s)
            else:
                entry_rules += extract_comparisons(s)

    # Convert tuples → JSON objects
    def conv(lst):
        result = []
        for left, op, right in lst:
            result.append({
                "left": left,
                "operator": op,
                "right": right
            })
        return result

    return {
        "entry": conv(entry_rules),
        "exit": conv(exit_rules)
    }

In [9]:
examples = [
    "Buy when the close price is above the 20-day moving average and volume is above 1 million.",
    "Enter when price crosses above yesterday's high.",
    "Exit when RSI(14) is below 30.",
]

for ex in examples:
    print("NL:", ex)
    print("JSON:", nl_to_json(ex))
    print("-" * 60)


NL: Buy when the close price is above the 20-day moving average and volume is above 1 million.
JSON: {'entry': [{'left': 'close', 'operator': '>', 'right': 'sma(close,0)'}, {'left': 'volume', 'operator': '>', 'right': 1000000}], 'exit': []}
------------------------------------------------------------
NL: Enter when price crosses above yesterday's high.
JSON: {'entry': [{'left': 'close', 'operator': 'crosses_above', 'right': 'yesterday_high'}], 'exit': []}
------------------------------------------------------------
NL: Exit when RSI(14) is below 30.
JSON: {'entry': [], 'exit': [{'left': 'rsi(close,14)', 'operator': '<', 'right': 30}]}
------------------------------------------------------------
