In [1]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path("..")
PROCESSED_DIR = BASE_DIR / "data" / "processed"

features_path = PROCESSED_DIR / "districts_features_v3_predictions.csv"
df = pd.read_csv(features_path)
print("rows:", len(df), "cols:", df.shape[1])
df.head()


rows: 929 cols: 13


Unnamed: 0,province_name,district_name,lat,lon,avg_temp,avg_rain,treecover_pct,potential_treecover_pct,missing_treecover_pct,model_potential_treecover_pct,model_missing_treecover_pct,pred_treecover_pct,treecover_gap_pct
0,Adana,Aladağ,37.666642,35.387781,16.739615,0.0,54.58,10,0.0,44.543306,0.0,45.610237,0.0
1,Adana,Ceyhan,37.011888,35.768198,19.804077,0.0,0.05,10,9.95,27.848537,27.798537,37.492598,37.442598
2,Adana,Feke,37.871495,35.821754,10.716615,0.0,15.07,10,0.0,30.32221,15.25221,27.646413,12.576413
3,Adana,Karaisali,37.259147,35.142888,12.239538,0.0,8.33,10,1.67,16.481983,8.151983,18.05402,9.72402
4,Adana,Karataş,36.675979,35.229132,21.450077,0.0,0.0,5,5.0,3.265905,3.265905,3.663069,3.663069


In [2]:
import numpy as np

df2 = df.copy()

# 0 olan treecover_pct değerlerini "veri yok" kabul edip null yapıyoruz
df2["treecover_pct_out"] = df2["treecover_pct"].mask(df2["treecover_pct"] == 0, np.nan)

# veri var mı?
df2["has_treecover_data"] = df2["treecover_pct_out"].notna()

# hızlı kontrol
print("treecover_pct==0 oranı:", (df2["treecover_pct"] == 0).mean())
print("has_treecover_data false sayısı:", (~df2["has_treecover_data"]).sum())
df2[["province_name","district_name","treecover_pct","treecover_pct_out","has_treecover_data"]].head(10)


treecover_pct==0 oranı: 0.5694294940796556
has_treecover_data false sayısı: 529


Unnamed: 0,province_name,district_name,treecover_pct,treecover_pct_out,has_treecover_data
0,Adana,Aladağ,54.58,54.58,True
1,Adana,Ceyhan,0.05,0.05,True
2,Adana,Feke,15.07,15.07,True
3,Adana,Karaisali,8.33,8.33,True
4,Adana,Karataş,0.0,,False
5,Adana,Kozan,0.59,0.59,True
6,Adana,Pozantı,4.73,4.73,True
7,Adana,Saimbeyli,43.16,43.16,True
8,Adana,Seyhan,0.0,,False
9,Adana,Tufanbeyli,0.0,,False


In [3]:
import json

def normalize_text(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip().lower()
    s = " ".join(s.split())
    # Türkçe karakterleri koruyoruz; sadece boşluk/normalize
    return s

lookup = {}

TREES_PER_HA = 500  # sende ne kullandıysan aynı kalsın

for _, r in df2.iterrows():
    prov = normalize_text(r["province_name"])
    dist = normalize_text(r["district_name"])
    key = f"{prov}|{dist}"

    treecover = r["treecover_pct_out"]
    has_data = bool(r["has_treecover_data"])

    # Not: treecover null ise gap/trees_needed gibi alanları da null bırakacağız ki frontend hesap yapmasın
    item = {
        "province_name": r["province_name"],
        "district_name": r["district_name"],
        "treecover_pct": None if pd.isna(treecover) else float(treecover),
        "has_treecover_data": has_data,

        # model çıktıları (sende isimler değişik olabilir; yoksa bu satırları kaldır)
        "potential_treecover_pct": float(r["model_potential_treecover_pct"]) if "model_potential_treecover_pct" in df2.columns else None,

        # alan
        "area_ha": float(r["area_ha"]) if "area_ha" in df2.columns else None,
        "trees_per_ha": TREES_PER_HA,
    }

    # Eğer veri varsa gap ve trees_needed hesapla / varsa df’den al
    if has_data and item["area_ha"] is not None and item["potential_treecover_pct"] is not None:
        gap_pct = max(0.0, min(100.0, item["potential_treecover_pct"] - item["treecover_pct"]))
        item["gap_pct"] = gap_pct
        item["trees_needed"] = int(round((gap_pct / 100.0) * item["area_ha"] * TREES_PER_HA))
    else:
        item["gap_pct"] = None
        item["trees_needed"] = None

    lookup[key] = item

json_out_path = PROCESSED_DIR / "districts_trees_needed_lookup.json"
with open(json_out_path, "w", encoding="utf-8") as f:
    json.dump(lookup, f, ensure_ascii=False, indent=2)

json_out_path, len(lookup)


(WindowsPath('../data/processed/districts_trees_needed_lookup.json'), 929)

In [4]:
# null olmuş mu?
sample_zero = df2[df2["treecover_pct"]==0].head(3)[["province_name","district_name"]]
print("Örnek 0 olanlar:", sample_zero.to_dict("records"))

# lookup içinden bir örnek çek
k0 = f"{normalize_text(sample_zero.iloc[0]['province_name'])}|{normalize_text(sample_zero.iloc[0]['district_name'])}"
print("lookup örnek:", k0, "=>", lookup.get(k0))


Örnek 0 olanlar: [{'province_name': 'Adana', 'district_name': 'Karataş'}, {'province_name': 'Adana', 'district_name': 'Seyhan'}, {'province_name': 'Adana', 'district_name': 'Tufanbeyli'}]
lookup örnek: adana|karataş => {'province_name': 'Adana', 'district_name': 'Karataş', 'treecover_pct': None, 'has_treecover_data': False, 'potential_treecover_pct': 3.2659052380952405, 'area_ha': None, 'trees_per_ha': 500, 'gap_pct': None, 'trees_needed': None}
