In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

# Yol tanımları
BASE_DIR = Path("..")  # notebooks klasöründen bir üst klasör
INTERIM_DIR = BASE_DIR / "data" / "interim"
PROCESSED_DIR = BASE_DIR / "data" / "processed"

climate_path = INTERIM_DIR / "districts_climate_10yr.csv"
treecover_path = INTERIM_DIR / "districts_treecover.csv"

climate_path, treecover_path


(WindowsPath('../data/interim/districts_climate_10yr.csv'),
 WindowsPath('../data/interim/districts_treecover.csv'))

In [2]:
climate_df = pd.read_csv(climate_path)
tree_df = pd.read_csv(treecover_path)

print("İklim verisi şekli:", climate_df.shape)
print("Ağaç verisi şekli:", tree_df.shape)

climate_df.head()


İklim verisi şekli: (929, 10)
Ağaç verisi şekli: (929, 5)


Unnamed: 0,province_name,district_name,lat,lon,avg_temp_10yr,avg_precip_10yr,temp_std_10yr,precip_std_10yr,temp_min_10yr,temp_max_10yr
0,Adana,Aladağ,37.666642,35.387781,16.739615,0.0,0.36265,0.0,16.216154,17.347692
1,Adana,Ceyhan,37.011888,35.768198,19.804077,0.0,0.37888,0.0,19.261538,20.400769
2,Adana,Feke,37.871495,35.821754,10.716615,0.0,0.439328,0.0,10.156154,11.414615
3,Adana,Karaisali,37.259147,35.142888,12.239538,0.0,0.417612,0.0,11.707692,12.945385
4,Adana,Karataş,36.675979,35.229132,21.450077,0.0,0.332005,0.0,21.102308,22.021538


In [3]:
tree_df.head()


Unnamed: 0,province_name,district_name,lat,lon,treecover_pct
0,Adana,Aladağ,37.666642,35.387781,54.58
1,Adana,Ceyhan,37.011888,35.768198,0.05
2,Adana,Feke,37.871495,35.821754,15.07
3,Adana,İmamoğlu,37.259451,35.608167,0.0
4,Adana,Karaisali,37.259147,35.142888,8.33


In [4]:
climate_df.columns


Index(['province_name', 'district_name', 'lat', 'lon', 'avg_temp_10yr',
       'avg_precip_10yr', 'temp_std_10yr', 'precip_std_10yr', 'temp_min_10yr',
       'temp_max_10yr'],
      dtype='object')

In [7]:
# İsimleri 10 yıllık ortalamadan daha kısa ve standart hale getiriyoruz
climate_df = climate_df.rename(columns={
    "avg_temp_10yr": "avg_temp",
    "avg_precip_10yr": "avg_rain"
})

climate_df[["province_name", "district_name", "avg_temp", "avg_rain"]].head()


Unnamed: 0,province_name,district_name,avg_temp,avg_rain
0,Adana,Aladağ,16.739615,0.0
1,Adana,Ceyhan,19.804077,0.0
2,Adana,Feke,10.716615,0.0
3,Adana,Karaisali,12.239538,0.0
4,Adana,Karataş,21.450077,0.0


In [8]:
merge_cols = ["province_name", "district_name"]

merged = pd.merge(
    climate_df,
    tree_df[merge_cols + ["treecover_pct"]],  # tree df'den sadece lazım olan kolonları al
    on=merge_cols,
    how="inner"
)

print("Birleştirilmiş veri şekli:", merged.shape)
merged.head()


Birleştirilmiş veri şekli: (929, 11)


Unnamed: 0,province_name,district_name,lat,lon,avg_temp,avg_rain,temp_std_10yr,precip_std_10yr,temp_min_10yr,temp_max_10yr,treecover_pct
0,Adana,Aladağ,37.666642,35.387781,16.739615,0.0,0.36265,0.0,16.216154,17.347692,54.58
1,Adana,Ceyhan,37.011888,35.768198,19.804077,0.0,0.37888,0.0,19.261538,20.400769,0.05
2,Adana,Feke,37.871495,35.821754,10.716615,0.0,0.439328,0.0,10.156154,11.414615,15.07
3,Adana,Karaisali,37.259147,35.142888,12.239538,0.0,0.417612,0.0,11.707692,12.945385,8.33
4,Adana,Karataş,36.675979,35.229132,21.450077,0.0,0.332005,0.0,21.102308,22.021538,0.0


In [9]:
merged.isna().sum()


province_name      0
district_name      0
lat                0
lon                0
avg_temp           0
avg_rain           0
temp_std_10yr      0
precip_std_10yr    0
temp_min_10yr      0
temp_max_10yr      0
treecover_pct      0
dtype: int64

In [10]:
def estimate_potential_treecover(row):
    rain = row["avg_rain"]   # mm / yıl (sen 10 yıllık ortalamayı hesaplamıştın)
    temp = row["avg_temp"]   # °C

    # 1) Yağışa göre temel seviye
    if rain >= 1200:
        base = 80   # Karadeniz tipi çok yağışlı
    elif rain >= 800:
        base = 60   # Marmara / Doğu Karadeniz dışı nemli bölgeler
    elif rain >= 500:
        base = 40   # Ilıman ama yarı nemli
    elif rain >= 300:
        base = 25   # Yarı kurak
    else:
        base = 10   # Çok kurak bölgeler

    # 2) Sıcaklığa göre hafif ayar
    # Çok soğuk veya çok sıcaksa biraz düşürelim
    if temp < 0:
        base -= 15   # aşırı soğuk
    elif temp < 5:
        base -= 5
    elif temp > 25:
        base -= 10   # çok sıcak
    elif temp > 20:
        base -= 5

    # 3) Limitler
    base = max(min(base, 90), 0)  # 0 ile 90 arasında sınırla
    return base

merged["potential_treecover_pct"] = merged.apply(estimate_potential_treecover, axis=1)
merged[["province_name", "district_name", "avg_rain", "avg_temp", "treecover_pct", "potential_treecover_pct"]].head()


Unnamed: 0,province_name,district_name,avg_rain,avg_temp,treecover_pct,potential_treecover_pct
0,Adana,Aladağ,0.0,16.739615,54.58,10
1,Adana,Ceyhan,0.0,19.804077,0.05,10
2,Adana,Feke,0.0,10.716615,15.07,10
3,Adana,Karaisali,0.0,12.239538,8.33,10
4,Adana,Karataş,0.0,21.450077,0.0,5


In [11]:
# Mevcut ağaç yüzdesi NaN ise 0 kabul edelim
merged["treecover_pct_filled"] = merged["treecover_pct"].fillna(0)

# Eksik yüzdelik: Potansiyel - mevcut (negatifler 0'a çekilir)
merged["missing_treecover_pct"] = np.maximum(
    merged["potential_treecover_pct"] - merged["treecover_pct_filled"],
    0
)

merged[["province_name", "district_name", "treecover_pct", "potential_treecover_pct", "missing_treecover_pct"]].head()


Unnamed: 0,province_name,district_name,treecover_pct,potential_treecover_pct,missing_treecover_pct
0,Adana,Aladağ,54.58,10,0.0
1,Adana,Ceyhan,0.05,10,9.95
2,Adana,Feke,15.07,10,0.0
3,Adana,Karaisali,8.33,10,1.67
4,Adana,Karataş,0.0,5,5.0


In [12]:
group_cols = ["province_name"]

summary = (
    merged
    .groupby(group_cols)[["treecover_pct_filled", "potential_treecover_pct", "missing_treecover_pct"]]
    .mean()
    .sort_values("missing_treecover_pct", ascending=False)
)

summary.head(15)


Unnamed: 0_level_0,treecover_pct_filled,potential_treecover_pct,missing_treecover_pct
province_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aksaray,0.0,10.0,10.0
Batman,0.0,10.0,10.0
Mardin,0.0,10.0,10.0
Mus,0.0,10.0,10.0
Nevsehir,0.0,10.0,10.0
Nigde,0.0,10.0,10.0
Kilis,0.0,10.0,10.0
Van,0.0,10.0,10.0
Sanliurfa,0.0,10.0,10.0
Malatya,0.0,10.0,10.0


In [13]:
os.makedirs(PROCESSED_DIR, exist_ok=True)

features_df = merged[[
    "province_name",
    "district_name",
    "lat",
    "lon",
    "avg_temp",
    "avg_rain",
    "treecover_pct",
    "potential_treecover_pct",
    "missing_treecover_pct"
]].copy()

output_path = PROCESSED_DIR / "districts_features_v1.csv"
features_df.to_csv(output_path, index=False)

len(features_df), output_path


(929, WindowsPath('../data/processed/districts_features_v1.csv'))

In [14]:
province_summary = (
    features_df
    .groupby("province_name")[["treecover_pct", "potential_treecover_pct", "missing_treecover_pct"]]
    .mean()
    .sort_values("missing_treecover_pct", ascending=False)
)

province_summary.head(20)


Unnamed: 0_level_0,treecover_pct,potential_treecover_pct,missing_treecover_pct
province_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aksaray,0.0,10.0,10.0
Batman,0.0,10.0,10.0
Mardin,0.0,10.0,10.0
Mus,0.0,10.0,10.0
Nevsehir,0.0,10.0,10.0
Nigde,0.0,10.0,10.0
Kilis,0.0,10.0,10.0
Van,0.0,10.0,10.0
Sanliurfa,0.0,10.0,10.0
Malatya,0.0,10.0,10.0
