# **Скачивание статей**

In [2]:
import pandas as pd
import json
import re
import os
from pathlib import Path


index_path = Path(os.getcwd()) / "index_data.json"
satellite_path = Path(os.getcwd()).parent.parent / "data" / "sat_data" / "satellite_spectrums.json"

with open(index_path, 'r', encoding='utf-8') as file:
    index_data = json.load(file)

with open(satellite_path, 'r', encoding='utf8') as file:
    bands_data = json.load(file)

In [3]:
formulas = [d['formula'] for d in index_data]

In [None]:
ALIASES = {
    "BLUE": (450, 520),
    "B":    (450, 520),
    "GREEN": (520, 600),
    "G":     (520, 600),
    "RED": (630, 690),
    "R":   (630, 690),
    "NIR": (760, 1400),
    "SWIR": (1400, 3000),
    "SWIR1": (1550, 1750),
    "SWIR2": (2080, 2350),
    "MIR":
    "rededge":
    "RVI": 
}


def alias_to_range(name: str):
    """Return alias numeric center value or (lo,hi)."""
    if name not in ALIASES:
        return None
    lo, hi = ALIASES[name]
    return (lo + hi) / 2   # можно вернуть диапазон полностью, если хочешь


def preprocess_aliases(formula: str) -> str:
    """Replace spectral alias names (RED, NIR, etc.) with numeric values."""
    for name in ALIASES:
        pattern = r"\b" + name + r"\b"
        value = alias_to_range(name)
        formula = re.sub(pattern, str(value), formula)
    return formula

In [5]:
for index in index_data:
    index["formula"] = preprocess_aliases(index["formula"])



In [10]:
import re
import sympy as sp
from sympy import sympify, simplify

def wavelength_to_band(wl_min, wl_max, bands):
    for bname, (bmin, bmax) in bands.items():
        # intersection check
        if wl_min >= bmin and wl_max <= bmax:
            return bname
    return "nan"


def convert_formula(formula, sat="landsat8"):
    f = formula

    # --- Replace ranges [700:710] ---
    def repl_range(m):
        lo = int(m.group(1))
        hi = int(m.group(2))
        b = wavelength_to_band(lo, hi, sat)
        return b

    f = re.sub(r"\[(\d+):(\d+)\]", repl_range, f)

    # --- Replace single [700] ---
    def repl_single(m):
        wl = int(m.group(1))
        b = wavelength_to_band(wl, wl, sat)
        return b

    f = re.sub(r"\[(\d+)\]", repl_single, f)

    # --- Replace "700nm" ---
    def repl_nm(m):
        wl = int(m.group(1))
        b = wavelength_to_band(wl, wl, sat)
        return b

    f = re.sub(r"(\d+)nm", repl_nm, f)

    # If variables like RED, BLUE appear — leave as is.

    # --- Simplify with sympy ---
    try:
        expr = sympify(f)
        f = str(simplify(expr))
    except Exception:
        pass

    return f

for formula in formulas:
    print(formula, " → ", convert_formula(formula, bands_data['landsat7']))

(734nm - 747nm)/(715nm - 720nm)  →  nan
735nm/[700:710]  →  1
[8475:8825]**2/([8125:8475]*[8925:9275])  →  nan
[700]*([670]*(([700]-[550])/[150]) + [670] + [550] - (([700]-[550])/[150]*[550]) )/([670]*(((([700]-[550])/[150]))**2 + 1)**0.5)  →  nan
(1094nm - 1205nm)/(1094nm + 1205nm)  →  0
900nm*(680nm + 800nm)/(970nm*(-680nm + 800nm))  →  nan
0.666666666666667*700nm*(0.2*550nm - 670nm + 0.8*700nm)*sqrt(5*sqrt(670nm) - 6*800nm + (2*800nm + 1)**2 + 0.5)/(670nm*(1.3*550nm - 2.5*670nm + 1.2*800nm))  →  B8*(0.133333333333333*B2 - 0.666666666666667*B3 + 0.533333333333333*B8)*sqrt(5*sqrt(B3) + 4*B4**2 - 2*B4 + 1.5)/(B3*(1.3*B2 - 2.5*B3 + 1.2*B4))
(-BLUE - GREEN + NIR - RED)/(BLUE + GREEN + NIR + RED)  →  (-BLUE - GREEN + NIR - RED)/(BLUE + GREEN + NIR + RED)
[2145:2185]/[2185:2225]  →  1
(531nm - 570nm)/(531nm + 570nm)  →  0
Averagereflectancebetween750nmand850nm  →  AveragereflectancebetweenB8andB4
2250nm  →  B6
(-550nm + 800nm)/(550nm + 800nm)  →  (-B2 + B4)/(B2 + B4)
0  →  0
NIR - RED*a  →

In [None]:
for index in index_data:
    sat_formulas = []
    for sat in bands_data:
        sat_formulas.append({sat: convert_formula(index['formula'], bands_data[sat])})
    index['satellite_formulas'] = sat_formulas

In [16]:
import pandas as pd

df = pd.DataFrame(index_data)
def merge_sat_formulas(lst):
    merged = {}
    for d in lst:
        merged.update(d)
    return merged

sat_df = df['satellite_formulas'].apply(merge_sat_formulas).apply(pd.Series)

# --- Соединяем с остальными колонками ---
df = df.drop(columns=['satellite_formulas']).join(sat_df)

# --- Результат ---
df.to_excel("index_table_v1.xlsx")

In [13]:
index_data

[{'idb_link': 'https://www.indexdatabase.de//db/i-single.php?id=365',
  'name': 'Modified Normalized Difference 734/747/715/720\r',
  'abbreviation': 'MD734/747/715/72\r',
  'formula': '(734nm - 747nm)/(715nm - 720nm)',
  'references': [{'article_name': 'Gitelson, Anatoly A.; Merzlyak, Mark N.; Lichtenthaler, Hartmut K. - Detection of Red Edge Position and Chlorophyll Content by Reflectance Measurements Near 700 nm\n',
    'article_link': '10.1016/s0176-1617(96)80285-9',
    'article_doi': 'http://www.sciencedirect.com/science/article/pii/S0176161796802859'},
   {'article_name': 'le Maire, G.; Francois, C.; Dufrene, E. - Towards universal broad leaf chlorophyll indices using PROSPECT simulated database and hyperspectral reflectance measurements\n',
    'article_link': '10.1016/j.rse.2003.09.004',
    'article_doi': ''},
   {'article_name': 'Moss, D.M.; Rock, B.N. - Analysis of red edge spectral characteristics and total chlorophyll values for red spruce (Picea rubens) branch segments f

# Извлечение алиасов

In [None]:
import pandas as pd

sheets = ["LANDSAT-89 OLITIRS", 
          "Landsat-7 ETM+", 
          "Landsat-45 TM", 
          "Landsat-12345 MSS", 
          "Landsat-123 RBV", 
          "Sentinel-2A MSI", 
          "EMIT",
          "EnMAP"]


df = pd.read_excel("Спутники.xlsx",sheet_name=sheets[0])

for alias, diap in zip(df['Алиас'], df['Диапазон, нм']):
    if str(alias) is not 'nan':
        print(alias)

SR_B1
SR_B2
SR_B3
SR_B4
SR_B5
SR_B6
SR_B7
SR_B8
SR_B9
ST_B10
ST_B11
nan
nan
nan
nan
nan
nan
nan


  if str(alias) is not 'nan':


: 