In [None]:
!pip -q install earthengine-api pandas openpyxl tqdm

# 중간 랜덤샘플링 데이터셋 기준 NDVI 추출 코드

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/산업수학/mainGPS.csv')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import ee, pandas as pd, numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm


try:
    ee.Initialize(project='matprocject11')
    print("GEE initialized (project=matprocject11).")
except Exception:
    ee.Authenticate()
    ee.Initialize()
    print("GEE authenticated & initialized.")


INPUT_XLSX = "/content/TOTAL_DWI_with_NDVI11111_filled.xlsx"
OUTPUT_XLSX = "/content/TOTAL_DWI_with_NDVI11111_filled_with_DSR.xlsx"

DATE_COL = "date"
LON_COL  = "lon"
LAT_COL  = "lat"



MCD18A1_DSR_BANDS = [
    'GMT_0000_DSR','GMT_0300_DSR','GMT_0600_DSR','GMT_0900_DSR',
    'GMT_1200_DSR','GMT_1500_DSR','GMT_1800_DSR','GMT_2100_DSR'
]

def daily_dsr_total_MJm2(lon: float, lat: float, date_str: str, scale: int = 1000):
    """
    특정 날짜(UTC)와 좌표에서 MODIS/061/MCD18A1의 3시간 DSR 밴드들을 합산해
    하루 누적 복사량(MJ/m^2) 반환. 이미지 없거나 오류 시 np.nan 반환.
    """
    try:
        d0 = ee.Date(date_str)
        d1 = d0.advance(1, 'day')
        pt = ee.Geometry.Point([float(lon), float(lat)])

        ic = ee.ImageCollection('MODIS/061/MCD18A1').filterDate(d0, d1)

        img = ic.first()

        if img.getInfo() is None:
            return np.nan


        band_names = img.bandNames()
        actual_bands = ee.List(MCD18A1_DSR_BANDS).filter(ee.Filter.inList('item', band_names))



        dsr_sum_wm2 = img.select(actual_bands).reduce(ee.Reducer.sum())
        dsr_total_Jm2 = dsr_sum_wm2.multiply(10800)
        dsr_total_MJm2 = dsr_total_Jm2.divide(1e6)


        val = dsr_total_MJm2.reduceRegion(
            reducer=ee.Reducer.first(),
            geometry=pt,
            scale=scale,
            bestEffort=True
        ).get('sum')


        if val is None:
            keys = dsr_total_MJm2.bandNames().getInfo()
            if keys:
                val = dsr_total_MJm2.reduceRegion(ㅅ
                    reducer=ee.Reducer.first(), geometry=pt, scale=scale, bestEffort=True
                ).get(keys[0])

        result = ee.Number(val).getInfo() if val is not None else np.nan
        return float(result) if result is not None else np.nan

    except Exception as e:

        return np.nan


df = pd.read_excel(INPUT_XLSX)


def to_date_str(x):
    if pd.isna(x):
        return None
    if isinstance(x, (datetime, pd.Timestamp)):
        return x.strftime("%Y-%m-%d")
    s = str(x).strip()
    return s[:10]

if DATE_COL not in df.columns or LON_COL not in df.columns or LAT_COL not in df.columns:
    raise ValueError(f"필수 컬럼이 누락됨: '{DATE_COL}', '{LON_COL}', '{LAT_COL}' 필요")

df["_date_str"] = df[DATE_COL].apply(to_date_str)


vals = []
for i, row in tqdm(df.iterrows(), total=len(df), desc="Computing DSR_total_MJm^2"):
    date_str = row["_date_str"]
    lon = row[LON_COL]
    lat = row[LAT_COL]

    if pd.isna(date_str) or pd.isna(lon) or pd.isna(lat):
        vals.append(np.nan)
        continue

    v = daily_dsr_total_MJm2(float(lon), float(lat), date_str)
    vals.append(v)

df["DSR_total_MJm^2"] = vals


df.drop(columns=["_date_str"], inplace=True)


df.to_excel(OUTPUT_XLSX, index=False)
print("Saved:", OUTPUT_XLSX)


GEE initialized (project=matprocject11).



Attention required for MODIS/061/MCD18A1! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MCD18A1

Computing DSR_total_MJm^2: 100%|██████████| 446/446 [02:17<00:00,  3.25it/s]


Saved: /content/TOTAL_DWI_with_NDVI11111_filled_with_DSR.xlsx


# 고정좌표(15개) 기준 sunlight 추출 코드

In [None]:
# -*- coding: utf-8 -*-

import ee, pandas as pd, numpy as np, time
from tqdm import tqdm

# GEE 초기화
try:
    ee.Initialize(project='matprocject11')
    print("GEE initialized (project='matprocject11').")
except Exception:
    ee.Authenticate()
    ee.Initialize()
    print("GEE authenticated & initialized.")

# 설정
WILDFIRE_CSV = "/content/wildfire_dataset.csv"
OUTPUT_CSV   = "/content/wildfire_dataset_with_sunlight.csv"

ERA5_DATASET = "ECMWF/ERA5_LAND/DAILY_AGGR"
ERA5_BAND    = "surface_solar_radiation_downwards_sum"  # J/m^2/day
SCALE_M_ERA5 = 10000
DATE_COL     = "date"

old_lat = 35.917326
old_lon = 129.441907
new_geom_lon = 129.38

# wildfire 데이터 로드
df_fire = pd.read_csv(WILDFIRE_CSV)
df_fire[DATE_COL] = pd.to_datetime(df_fire[DATE_COL])

# 좌표 rounding
df_fire["lat_rounded"] = df_fire["lat"].round(6)
df_fire["lon_rounded"] = df_fire["lon"].round(6)

# 고유 좌표/날짜 생성
df_sites = (
    df_fire[["lat_rounded", "lon_rounded"]]
    .drop_duplicates()
    .reset_index(drop=True)
)
df_sites["site_id"] = df_sites.index
df_sites["geom_lat"] = df_sites["lat_rounded"]
df_sites["geom_lon"] = df_sites["lon_rounded"]

# 특정 좌표 geom_lon만 이동
mask = (
    df_sites["geom_lat"].eq(round(old_lat, 6)) &
    df_sites["geom_lon"].eq(round(old_lon, 6))
)
df_sites.loc[mask, "geom_lon"] = new_geom_lon

print("고유 좌표 개수:", len(df_sites))

dates_unique = (
    df_fire[DATE_COL]
    .dt.strftime("%Y-%m-%d")
    .drop_duplicates()
    .sort_values()
    .tolist()
)
print("고유 날짜 개수:", len(dates_unique))

# FeatureCollection 생성
def row_to_feature(row):
    geom = ee.Geometry.Point([float(row["geom_lon"]), float(row["geom_lat"])])
    props = {
        "site_id": int(row["site_id"]),
        "lat": float(row["lat_rounded"]),
        "lon": float(row["lon_rounded"]),
    }
    return ee.Feature(geom, props)

features = [row_to_feature(r) for _, r in df_sites.iterrows()]
fc_sites = ee.FeatureCollection(features)

# 한 날짜에 대한 ERA5 일사량 추출
def era5_for_all_sites_one_day(date_str, fc_sites, scale=SCALE_M_ERA5,
                               retries=3, delay=1.5):
    for k in range(retries):
        try:
            d0 = ee.Date(date_str)
            d1 = d0.advance(1, "day")

            img = ee.ImageCollection(ERA5_DATASET).filterDate(d0, d1).first()
            if img.getInfo() is None:
                return []

            mj_img = img.select(ERA5_BAND).divide(1e6).rename("sunlight_era5")
            mj_img = mj_img.unmask(-9999)

            fc_sampled = mj_img.sampleRegions(
                collection=fc_sites,
                scale=scale,
                geometries=False
            )

            res = fc_sampled.getInfo()
            feats = res.get("features", [])

            for f in feats:
                if "properties" not in f:
                    f["properties"] = {}
                f["properties"][DATE_COL] = date_str

            return feats

        except Exception as e:
            print(f"[{date_str}] Error: {e}")
            if k == retries - 1:
                return []
            time.sleep(delay)

# 모든 날짜에 대해 반복 추출
all_props = []
pbar = tqdm(total=len(dates_unique), desc="Extracting ERA5 sunlight for wildfire dates")

for ds in dates_unique:
    feats = era5_for_all_sites_one_day(ds, fc_sites, scale=SCALE_M_ERA5)
    for f in feats:
        all_props.append(f["properties"])
    pbar.update(1)

pbar.close()
print("추출된 props 개수:", len(all_props))

# ERA5 결과 DF 생성
df_era5 = pd.DataFrame(all_props)

df_era5[DATE_COL] = pd.to_datetime(df_era5[DATE_COL])
df_era5["lat_rounded"] = df_era5["lat"].round(6)
df_era5["lon_rounded"] = df_era5["lon"].round(6)

# df_era5["sunlight_era5"] = df_era5["sunlight_era5"].replace(-9999, np.nan)

# merge 후 저장
df_out = pd.merge(
    df_fire,
    df_era5[["lat_rounded", "lon_rounded", DATE_COL, "sunlight_era5"]],
    on=["lat_rounded", "lon_rounded", DATE_COL],
    how="left"
)

df_out.drop(columns=["lat_rounded", "lon_rounded"], inplace=True)

df_out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

print("완료! 저장:", OUTPUT_CSV)
print("sunlight_era5 결측치:", df_out["sunlight_era5"].isna().sum())
print(df_out.head())


GEE initialized (project='matprocject11').
문제 좌표(원본 lat/lon, geom 이동 전):
         lat         lon   geom_lat    geom_lon  type
7  35.917326  129.441907  35.917326  129.441907  wind
문제 좌표(geom_lon 이동 후):
         lat         lon   geom_lat  geom_lon  type
7  35.917326  129.441907  35.917326    129.38  wind
사이트 개수: 15


Extracting ERA5 sunlight (MJ/m^2/day, 2019-2024, fast): 100%|██████████| 2192/2192 [09:31<00:00,  3.83it/s]


추출된 feature 개수: 32880
site_id별 행 개수:
site_id
0     2192
1     2192
2     2192
3     2192
4     2192
5     2192
6     2192
7     2192
8     2192
9     2192
10    2192
11    2192
12    2192
13    2192
14    2192
Name: count, dtype: int64
Saved: /content/era5_sunlight_2019_2024_final.csv
         lat         lon type  site_id       date  year  sunlight_era5
0  35.787434  129.158357  dry        0 2019-01-01  2019      11.290624
1  35.810584  129.176430  dry        1 2019-01-01  2019      11.279440
2  36.007597  129.101096  dry        2 2019-01-01  2019      11.107452
3  36.017285  129.098238  dry        3 2019-01-01  2019      11.107452
4  36.071850  128.751038  dry        4 2019-01-01  2019      10.378136
