In [2]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import rasterio
from rasterio.windows import Window
from tqdm import tqdm

# Pencere yarıçapı (half_size=4 -> 9x9 pencere, half_size=5 -> 11x11)
WINDOW_HALF_SIZE = 4
WINDOW_SIZE = 2 * WINDOW_HALF_SIZE + 1
print(f"Pencere boyutu: {WINDOW_SIZE} x {WINDOW_SIZE}")


Pencere boyutu: 9 x 9


In [3]:
districts_path = "../data/interim/districts_base.csv"

df = pd.read_csv(districts_path)
print("İlçe sayısı:", len(df))
df.head()


İlçe sayısı: 929


Unnamed: 0,province_name,district_name,lat,lon
0,Adana,Aladağ,37.666642,35.387781
1,Adana,Ceyhan,37.011888,35.768198
2,Adana,Feke,37.871495,35.821754
3,Adana,İmamoğlu,37.259451,35.608167
4,Adana,Karaisali,37.259147,35.142888


In [4]:
gfw_dir = Path("../data/raw/gfw")
tif_paths = sorted(gfw_dir.glob("Hansen_GFC2015_treecover2000_*.tif"))

print("Bulunan TIF dosyaları:")
for p in tif_paths:
    print(" -", p.name)

if len(tif_paths) == 0:
    raise FileNotFoundError("Hansen_GFC2015_treecover2000_*.tif bulunamadı. gfw klasörünü kontrol et.")


Bulunan TIF dosyaları:
 - Hansen_GFC2015_treecover2000_30N_020E.tif
 - Hansen_GFC2015_treecover2000_30N_030E.tif
 - Hansen_GFC2015_treecover2000_30N_040E.tif
 - Hansen_GFC2015_treecover2000_40N_020E.tif
 - Hansen_GFC2015_treecover2000_40N_030E.tif
 - Hansen_GFC2015_treecover2000_40N_040E (1).tif
 - Hansen_GFC2015_treecover2000_50N_020E.tif
 - Hansen_GFC2015_treecover2000_50N_030E.tif
 - Hansen_GFC2015_treecover2000_50N_040E.tif


In [5]:
src_list = []
for p in tif_paths:
    src = rasterio.open(p)
    print(p.name, "-> CRS:", src.crs, "Bounds:", src.bounds, "NoData:", src.nodata)
    src_list.append(src)

# Hepsi EPSG:4326 olmalı


Hansen_GFC2015_treecover2000_30N_020E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=20.0, bottom=20.0, right=30.0, top=30.0) NoData: None
Hansen_GFC2015_treecover2000_30N_030E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=30.0, bottom=20.0, right=40.0, top=30.0) NoData: None
Hansen_GFC2015_treecover2000_30N_040E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=40.0, bottom=20.0, right=50.0, top=30.0) NoData: None
Hansen_GFC2015_treecover2000_40N_020E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=20.0, bottom=30.0, right=30.0, top=40.0) NoData: None
Hansen_GFC2015_treecover2000_40N_030E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=30.0, bottom=30.0, right=40.0, top=40.0) NoData: None
Hansen_GFC2015_treecover2000_40N_040E (1).tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=40.0, bottom=30.0, right=50.0, top=40.0) NoData: None
Hansen_GFC2015_treecover2000_50N_020E.tif -> CRS: EPSG:4326 Bounds: BoundingBox(left=20.0, bottom=40.0, right=30.0, top=50.0) NoData: None
Hansen_GFC2015_treecove

In [6]:
def find_src_for_point(lon, lat, src_list):
    """
    Verilen lon/lat için uygun raster dosyasını bulur.
    Eğer koordinat hiçbir rasterın sınırları içinde değilse None döner.
    """
    for src in src_list:
        left, bottom, right, top = src.bounds
        if (left <= lon <= right) and (bottom <= lat <= top):
            return src
    return None


In [7]:
def sample_mean_window(src, lon, lat, half_size=4):
    """
    Verilen lon/lat için raster üzerinde (2*half_size+1) x (2*half_size+1)
    boyutunda bir pencerenin ortalamasını döndürür.

    Ör: half_size=4 -> 9x9 pencere
        half_size=5 -> 11x11 pencere
    """
    # Raster index (x = lon, y = lat)
    row, col = src.index(lon, lat)

    row0 = max(row - half_size, 0)
    col0 = max(col - half_size, 0)

    height = 2 * half_size + 1
    width = 2 * half_size + 1

    window = Window(col0, row0, width, height)
    data = src.read(1, window=window).astype("float32")

    if np.all(np.isnan(data)):
        return np.nan

    return float(np.nanmean(data))


In [8]:
# Örnek: Artvin Ardanuç (sen başka il/ilçe de seçebilirsin)
example = df[(df["province_name"] == "Artvin") & (df["district_name"] == "Ardanuç")]
example


Unnamed: 0,province_name,district_name,lat,lon
107,Artvin,Ardanuç,41.088457,42.139993


In [9]:
test_row = example.iloc[0]
lon, lat = test_row["lon"], test_row["lat"]
test_row


province_name       Artvin
district_name      Ardanuç
lat              41.088457
lon              42.139993
Name: 107, dtype: object

In [10]:
src_test = find_src_for_point(lon, lat, src_list)
src_test.name, src_test.bounds


('..\\data\\raw\\gfw\\Hansen_GFC2015_treecover2000_50N_040E.tif',
 BoundingBox(left=40.0, bottom=40.0, right=50.0, top=50.0))

In [11]:
sample_mean_window(src_test, lon, lat, half_size=WINDOW_HALF_SIZE)


2.4567902088165283

In [12]:
values = []

for lon, lat in tqdm(zip(df["lon"], df["lat"]), total=len(df), desc="İlçeler"):
    src = find_src_for_point(lon, lat, src_list)
    if src is None:
        values.append(np.nan)
        continue

    val = sample_mean_window(src, lon, lat, half_size=WINDOW_HALF_SIZE)
    values.append(val)

len(values)


İlçeler: 100%|█| 929/929 [00:01<00:00, 729.54


929

In [13]:
df["treecover_raw"] = values
df["treecover_pct"] = df["treecover_raw"].round(2)

df[["province_name", "district_name", "treecover_pct"]].head()


Unnamed: 0,province_name,district_name,treecover_pct
0,Adana,Aladağ,54.58
1,Adana,Ceyhan,0.05
2,Adana,Feke,15.07
3,Adana,İmamoğlu,0.0
4,Adana,Karaisali,8.33


In [14]:
df["treecover_pct"].describe()


count    929.000000
mean      13.205587
std       25.567973
min        0.000000
25%        0.000000
50%        0.000000
75%       11.100000
max       98.720000
Name: treecover_pct, dtype: float64

In [15]:
df.groupby("province_name")["treecover_pct"].mean().sort_values(ascending=False).head(20)


province_name
Düzce         66.528750
Rize          63.454167
Ordu          58.814444
Artvin        57.428750
Giresun       54.148125
Yalova        52.453333
Zinguldak     46.345000
Trabzon       44.549444
Sinop         41.526667
Kastamonu     37.772000
Bolu          33.264444
Sakarya       32.363846
Mugla         31.665833
Samsun        27.567333
Kocaeli       26.837143
Balikesir     26.316316
Karabük       25.426667
Kirklareli    24.188750
Osmaniye      23.070000
Tekirdag      19.872222
Name: treecover_pct, dtype: float64

In [17]:
os.makedirs("../data/interim", exist_ok=True)

output_path = "../data/interim/districts_treecover.csv"
df_out = df[["province_name", "district_name", "lat", "lon", "treecover_pct"]].copy()
df_out.to_csv(output_path, index=False)

len(df_out), output_path


(929, '../data/interim/districts_treecover.csv')