In [1]:
import xarray as xr
import pandas as pd

In [2]:
data_path = {
    'tg': './Climate/RAW/TG_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'tn': './Climate/RAW/TN_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'tx': './Climate/RAW/TX_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'rr': './Climate/RAW/RR_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'rr1': './Climate/RAW/RR1_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc'
}

In [5]:
# 예측데이터에 활용할 연도 추출
target_year = [2025, 2030, 2035, 2040]

### TG 로드하기

In [3]:
tg = xr.open_dataset(data_path['tg'])

In [4]:
tg = tg.to_dataframe()
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TG
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-05,-89.75,-179.75,0,2011-01-01,
2011-01-05,-89.75,-179.75,1,2011-01-11,
2011-01-05,-89.75,-179.25,0,2011-01-01,
2011-01-05,-89.75,-179.25,1,2011-01-11,
2011-01-05,-89.75,-178.75,0,2011-01-01,


In [7]:
tg = tg[tg['time_bounds'].dt.year.isin(target_year)]
tg = tg[~tg['TG'].isna()]
tg.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   TG           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


### TN 로드하기

In [8]:
tn = xr.open_dataset(data_path['tn'])

In [9]:
tn = tn.to_dataframe()
tn.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TN
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-05,-89.75,-179.75,0,2011-01-01,
2011-01-05,-89.75,-179.75,1,2011-01-11,
2011-01-05,-89.75,-179.25,0,2011-01-01,
2011-01-05,-89.75,-179.25,1,2011-01-11,
2011-01-05,-89.75,-178.75,0,2011-01-01,


In [10]:
tn = tn[tn['time_bounds'].dt.year.isin(target_year)]
tn = tn[~tn['TN'].isna()]
tn.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   TN           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


### TX 로드하기

In [12]:
tx = xr.open_dataset(data_path['tx'])

In [13]:
tx = tx.to_dataframe()

In [14]:
tx = tx[tx['time_bounds'].dt.year.isin(target_year)]
tx = tx[~tx['TX'].isna()]

### TG, TN, TX 합치기

In [15]:
temp_df = pd.concat([tg, tn['TN'], tx['TX']], axis = 1)
temp_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TG,TN,TX
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186
2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434
2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068
2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115
2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984


In [16]:
temp_df = temp_df.reset_index()

In [17]:
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984


In [18]:
# temp만 모아 중간 저장
temp_df.to_csv('./Climate/climate_data_temp_for_prediction.csv')

### RR 로드하기

In [19]:
rr = xr.open_dataset(data_path['rr'])

In [20]:
rr = rr.to_dataframe()

In [21]:
rr = rr[rr['time_bounds'].dt.year.isin(target_year)]
rr = rr[~rr['RR'].isna()]
rr.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   RR           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


In [25]:
rr = rr.reset_index()

### RR1 로드하기

In [22]:
rr1 = xr.open_dataset(data_path['rr1'])

In [23]:
rr1 = rr1.to_dataframe()

In [24]:
rr1 = rr1[rr1['time_bounds'].dt.year.isin(target_year)]
rr1 = rr1[~rr1['RR1'].isna()]
rr1.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   RR1          float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


In [26]:
rr1 = rr1.reset_index()

### temp데이터와 RR, RR1 합치기

In [27]:
temp_df = pd.concat([temp_df, rr['RR'], rr1['RR1']], axis = 1)
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0


In [28]:
# 중간저장
temp_df.to_csv('./Climate/climate_data_for_prediction.csv')

### 성장시기에 따라 데이터 합치기

1. time_bounds를 통해 'month' column을 새로 생성한다.
2. 남반구/북반구 분류 - 'northern' column을 사용하여 lat >= 0 : 1, lat < 0: 0 값을 배정한다.
3. growing season 컬럼을 새로 생성하여 북반구: 4~10월에 1 / 남반구: 10~4월에 1값을 배정함
4. 데이터를 각각 2025년, 2030년, 2035년, 2040년 데이터프레임으로 나눈다.
5. 데이터 각각 groupby로 growing season의 집계값을 구한다.

In [3]:
import numpy as np

In [2]:
temp_df = pd.read_csv('./Climate/climate_data_for_prediction.csv')

In [3]:
temp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19416960 entries, 0 to 19416959
Data columns (total 11 columns):
 #   Column       Dtype  
---  ------       -----  
 0   Unnamed: 0   int64  
 1   time         object 
 2   lat          float64
 3   lon          float64
 4   bounds       int64  
 5   time_bounds  object 
 6   TG           float64
 7   TN           float64
 8   TX           float64
 9   RR           float64
 10  RR1          float64
dtypes: float64(7), int64(2), object(2)
memory usage: 1.6+ GB


In [4]:
temp_df['time_bounds'] = pd.to_datetime(temp_df['time_bounds'])

In [5]:
temp_df['month'] = temp_df['time_bounds'].dt.month
temp_df.head()

Unnamed: 0.1,Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month
0,0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.81882,278.28824,285.3282,26.840868,9.0,1
1,1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.95447,278.41852,285.48843,27.8345,10.0,1
2,2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.16818,285.86307,29.812483,10.0,1
3,3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.9762,277.32776,284.4861,16.897108,7.0,1
4,4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.66254,284.92984,17.66687,7.0,1


In [6]:
temp_df['northern'] = np.where(temp_df['lat']>= 0, 1, 0)
temp_df.head()

Unnamed: 0.1,Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern
0,0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.81882,278.28824,285.3282,26.840868,9.0,1,0
1,1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.95447,278.41852,285.48843,27.8345,10.0,1,0
2,2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.16818,285.86307,29.812483,10.0,1,0
3,3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.9762,277.32776,284.4861,16.897108,7.0,1,0
4,4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.66254,284.92984,17.66687,7.0,1,0


In [7]:
temp_df.drop('Unnamed: 0', axis = 1, inplace = True)
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.81882,278.28824,285.3282,26.840868,9.0,1,0
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.95447,278.41852,285.48843,27.8345,10.0,1,0
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.16818,285.86307,29.812483,10.0,1,0
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.9762,277.32776,284.4861,16.897108,7.0,1,0
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.66254,284.92984,17.66687,7.0,1,0


In [8]:
# growing season
# 1: ((df_temp['northern'] == 1) & (temp_df['month']>= 4 & temp_df['month']<= 10)) | ((df_temp['northern'] == 0) & (temp_df['month] >= 10 | temp_df['month] <= 4))

temp_df['growing_season'] = 0

cond1 = (temp_df['northern'] == 1) & ((temp_df['month']>= 4) & (temp_df['month']<= 10))
cond2 = (temp_df['northern'] == 0) & ((temp_df['month'] >= 10) | (temp_df['month'] <= 4))
temp_df.loc[cond1 | cond2, 'growing_season'] = 1
temp_df.head()


Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern,growing_season
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.81882,278.28824,285.3282,26.840868,9.0,1,0,1
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.95447,278.41852,285.48843,27.8345,10.0,1,0,1
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.16818,285.86307,29.812483,10.0,1,0,1
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.9762,277.32776,284.4861,16.897108,7.0,1,0,1
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.66254,284.92984,17.66687,7.0,1,0,1


In [9]:
temp_df['year'] = temp_df['time_bounds'].dt.year
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern,growing_season,year
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.81882,278.28824,285.3282,26.840868,9.0,1,0,1,2025
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.95447,278.41852,285.48843,27.8345,10.0,1,0,1,2025
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.16818,285.86307,29.812483,10.0,1,0,1,2025
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.9762,277.32776,284.4861,16.897108,7.0,1,0,1,2025
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.66254,284.92984,17.66687,7.0,1,0,1,2025


In [10]:
result = temp_df.groupby(['year', 'lat', 'lon', 'growing_season']).agg({
    'TG': 'mean',
    'TN': 'min',
    'TX': 'max',
    'RR': 'mean',
    'RR1': 'mean'
}
).reset_index()

In [11]:
result = result[result['growing_season'] == 1]
result.head()

Unnamed: 0,year,lat,lon,growing_season,TG,TN,TX,RR,RR1
1,2025,-55.75,-68.25,1,280.370402,272.85995,286.98093,21.325952,6.642857
3,2025,-55.75,-67.75,1,280.408706,272.80548,287.30997,22.646382,6.809524
5,2025,-55.75,-67.25,1,280.289852,272.4064,287.93896,22.818376,6.785714
7,2025,-55.25,-70.75,1,279.80372,272.3072,285.92715,13.733531,5.47619
9,2025,-55.25,-70.25,1,280.079912,272.6043,286.13077,14.348862,5.642857


In [12]:
result.drop('growing_season', axis = 1, inplace = True)

In [13]:
result.head()

Unnamed: 0,year,lat,lon,TG,TN,TX,RR,RR1
1,2025,-55.75,-68.25,280.370402,272.85995,286.98093,21.325952,6.642857
3,2025,-55.75,-67.75,280.408706,272.80548,287.30997,22.646382,6.809524
5,2025,-55.75,-67.25,280.289852,272.4064,287.93896,22.818376,6.785714
7,2025,-55.25,-70.75,279.80372,272.3072,285.92715,13.733531,5.47619
9,2025,-55.25,-70.25,280.079912,272.6043,286.13077,14.348862,5.642857


In [19]:
result_2025 = result[result['year']== 2025]
result_2025.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 1 to 134839
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float64
 4   TN      67420 non-null  float64
 5   TX      67420 non-null  float64
 6   RR      67420 non-null  float64
 7   RR1     67420 non-null  float64
dtypes: float64(7), int32(1)
memory usage: 4.4 MB


In [22]:
result_2030 = result[result['year']== 2030]
result_2030.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 134841 to 269679
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float64
 4   TN      67420 non-null  float64
 5   TX      67420 non-null  float64
 6   RR      67420 non-null  float64
 7   RR1     67420 non-null  float64
dtypes: float64(7), int32(1)
memory usage: 4.4 MB


In [23]:
result_2035 = result[result['year']== 2035]
result_2035.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 269681 to 404519
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float64
 4   TN      67420 non-null  float64
 5   TX      67420 non-null  float64
 6   RR      67420 non-null  float64
 7   RR1     67420 non-null  float64
dtypes: float64(7), int32(1)
memory usage: 4.4 MB


In [24]:
result_2040 = result[result['year']== 2040]
result_2040.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 404521 to 539359
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float64
 4   TN      67420 non-null  float64
 5   TX      67420 non-null  float64
 6   RR      67420 non-null  float64
 7   RR1     67420 non-null  float64
dtypes: float64(7), int32(1)
memory usage: 4.4 MB


### Column 이름 변경하기

In [20]:
col_name = [col for col in result.columns]
col_rename_2025 = {col : col+"_25" for col in col_name}
col_rename_2030 = {col : col+"_30" for col in col_name}
col_rename_2035 = {col : col+"_35" for col in col_name}
col_rename_2040 = {col : col+"_40" for col in col_name}

In [25]:
result_2025.rename(columns = col_rename_2025, inplace = True)
result_2030.rename(columns = col_rename_2030, inplace = True)
result_2035.rename(columns = col_rename_2035, inplace = True)
result_2040.rename(columns = col_rename_2040, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2025.rename(columns = col_rename_2025, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2030.rename(columns = col_rename_2030, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2035.rename(columns = col_rename_2035, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-vie

In [26]:
result_2025

Unnamed: 0,year_25,lat_25,lon_25,TG_25,TN_25,TX_25,RR_25,RR1_25
1,2025,-55.75,-68.25,280.370402,272.85995,286.98093,21.325952,6.642857
3,2025,-55.75,-67.75,280.408706,272.80548,287.30997,22.646382,6.809524
5,2025,-55.75,-67.25,280.289852,272.40640,287.93896,22.818376,6.785714
7,2025,-55.25,-70.75,279.803720,272.30720,285.92715,13.733531,5.476190
9,2025,-55.25,-70.25,280.079912,272.60430,286.13077,14.348862,5.642857
...,...,...,...,...,...,...,...,...
134831,2025,83.75,-30.75,266.530884,242.43130,282.67657,3.396297,0.666667
134833,2025,83.75,-30.25,266.682550,242.38345,282.88696,3.425099,0.666667
134835,2025,83.75,-29.75,266.816579,242.35439,283.00030,3.479614,0.666667
134837,2025,83.75,-29.25,267.265828,242.60606,283.34137,0.935276,0.380952


In [27]:
result_2030

Unnamed: 0,year_30,lat_30,lon_30,TG_30,TN_30,TX_30,RR_30,RR1_30
134841,2030,-55.75,-68.25,280.140747,273.69327,287.02786,19.199225,6.404762
134843,2030,-55.75,-67.75,280.142717,273.50577,287.07764,20.584029,6.595238
134845,2030,-55.75,-67.25,279.986630,272.87674,287.39620,20.933835,6.500000
134847,2030,-55.25,-70.75,279.720813,273.42682,286.16992,12.678609,4.571429
134849,2030,-55.25,-70.25,279.967381,273.73962,286.48200,13.346206,4.833333
...,...,...,...,...,...,...,...,...
269671,2030,83.75,-30.75,266.467085,238.72623,280.43253,3.425451,0.571429
269673,2030,83.75,-30.25,266.623536,238.79033,280.66420,3.389993,0.523810
269675,2030,83.75,-29.75,266.762156,238.86676,280.80325,3.375963,0.523810
269677,2030,83.75,-29.25,267.216218,239.23097,281.20316,0.936212,0.238095


In [28]:
result_2035

Unnamed: 0,year_35,lat_35,lon_35,TG_35,TN_35,TX_35,RR_35,RR1_35
269681,2035,-55.75,-68.25,279.759385,270.38922,286.54350,25.751300,7.404762
269683,2035,-55.75,-67.75,279.788214,269.97882,286.54700,27.248379,7.500000
269685,2035,-55.75,-67.25,279.659276,269.03363,286.98782,27.412135,7.476190
269687,2035,-55.25,-70.75,279.226500,270.76596,286.52734,16.889334,6.285714
269689,2035,-55.25,-70.25,279.491784,270.83618,286.69240,17.673714,6.500000
...,...,...,...,...,...,...,...,...
404511,2035,83.75,-30.75,265.309685,237.43619,280.33310,2.755309,0.714286
404513,2035,83.75,-30.25,265.457070,237.48955,280.54730,2.757977,0.714286
404515,2035,83.75,-29.75,265.586652,237.55797,280.68314,2.764056,0.714286
404517,2035,83.75,-29.25,266.031357,237.91226,281.09250,0.762055,0.285714


In [29]:
result_2040

Unnamed: 0,year_40,lat_40,lon_40,TG_40,TN_40,TX_40,RR_40,RR1_40
404521,2040,-55.75,-68.25,279.734722,272.46680,286.40137,21.069766,7.166667
404523,2040,-55.75,-67.75,279.792674,272.40870,286.45975,22.270613,7.333333
404525,2040,-55.75,-67.25,279.692799,271.97192,286.82800,22.403786,7.238095
404527,2040,-55.25,-70.75,279.087441,271.40240,284.70187,14.235677,5.904762
404529,2040,-55.25,-70.25,279.378820,271.79413,285.19650,14.702338,5.857143
...,...,...,...,...,...,...,...,...
539351,2040,83.75,-30.75,265.067294,239.31070,280.96283,3.162781,0.619048
539353,2040,83.75,-30.25,265.221655,239.34077,281.22787,3.184580,0.619048
539355,2040,83.75,-29.75,265.358688,239.38052,281.40134,3.216710,0.666667
539357,2040,83.75,-29.25,265.810416,239.70972,281.83545,0.903655,0.380952


### 2021 Train 데이터에 [2025, 2030, 2035, 2040] 값 붙여주기

In [14]:
df = pd.read_csv('./Climate/climate_data_2021_growing_season.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1
0,0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [15]:
df.set_index('Unnamed: 0', inplace= True)
df.head()

Unnamed: 0_level_0,lat,lon,TG,TN,TX,RR,RR1
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [16]:
df['year'] = 2021

In [17]:
df.head()

Unnamed: 0_level_0,lat,lon,TG,TN,TX,RR,RR1,year
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,2021
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,2021
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,2021
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,2021
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,2021


In [18]:
result.head()

Unnamed: 0,year,lat,lon,TG,TN,TX,RR,RR1
1,2025,-55.75,-68.25,280.370402,272.85995,286.98093,21.325952,6.642857
3,2025,-55.75,-67.75,280.408706,272.80548,287.30997,22.646382,6.809524
5,2025,-55.75,-67.25,280.289852,272.4064,287.93896,22.818376,6.785714
7,2025,-55.25,-70.75,279.80372,272.3072,285.92715,13.733531,5.47619
9,2025,-55.25,-70.25,280.079912,272.6043,286.13077,14.348862,5.642857


In [20]:
df = df[['year', 'lat', 'lon', 'TG', 'TN','TX', 'RR', 'RR1']]
df.head()

Unnamed: 0_level_0,year,lat,lon,TG,TN,TX,RR,RR1
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2021,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,2021,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,2021,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,2021,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,2021,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [22]:
result_merge = pd.concat([df, result], axis = 0)
result_merge.head()

Unnamed: 0,year,lat,lon,TG,TN,TX,RR,RR1
0,2021,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,2021,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,2021,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,2021,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,2021,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [24]:
result_merge = result_merge.sort_values(by = ['lat', 'lon', 'year'])

In [25]:
result_merge.to_csv('./Climate/climate_data_2021-2040_v2.csv')

In [33]:
merged_df = pd.merge(df, result_2025, left_on=['lat', 'lon'], right_on=['lat_25', 'lon_25'], how='outer')
merged_df.head()

Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1,year_25,lat_25,lon_25,TG_25,TN_25,TX_25,RR_25,RR1_25
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,2025,0.25,-80.25,298.899167,293.59348,306.16718,19.012498,4.166667
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,2025,0.25,-79.75,299.081277,293.40118,307.75146,42.048316,6.952381
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,2025,0.25,-79.25,299.404286,293.80328,308.42245,50.959609,7.238095
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,2025,0.25,-78.75,293.670641,287.18896,304.25934,34.33925,6.571429
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,2025,0.25,-78.25,288.230551,281.39078,299.71674,19.279979,5.619048


In [34]:
merged_df = pd.merge(merged_df, result_2030, left_on=['lat', 'lon'], right_on=['lat_30', 'lon_30'], how='outer')
merged_df.head()

Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1,year_25,lat_25,lon_25,...,RR_25,RR1_25,year_30,lat_30,lon_30,TG_30,TN_30,TX_30,RR_30,RR1_30
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,2025,0.25,-80.25,...,19.012498,4.166667,2030,0.25,-80.25,299.942452,294.4619,308.8,18.833276,3.5
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,2025,0.25,-79.75,...,42.048316,6.952381,2030,0.25,-79.75,300.006286,294.0907,310.1739,40.36992,5.52381
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,2025,0.25,-79.25,...,50.959609,7.238095,2030,0.25,-79.25,300.206926,294.34546,310.5644,47.372767,5.595238
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,2025,0.25,-78.75,...,34.33925,6.571429,2030,0.25,-78.75,294.362856,287.4778,306.28394,30.484589,4.928571
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,2025,0.25,-78.25,...,19.279979,5.619048,2030,0.25,-78.25,288.899348,281.67804,301.739,17.060299,4.5


In [35]:
merged_df = pd.merge(merged_df, result_2035, left_on=['lat', 'lon'], right_on=['lat_35', 'lon_35'], how='outer')
merged_df.head()

Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1,year_25,lat_25,lon_25,...,RR_30,RR1_30,year_35,lat_35,lon_35,TG_35,TN_35,TX_35,RR_35,RR1_35
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,2025,0.25,-80.25,...,18.833276,3.5,2035,0.25,-80.25,299.324106,294.64328,308.04755,23.321893,5.214286
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,2025,0.25,-79.75,...,40.36992,5.52381,2035,0.25,-79.75,299.43549,294.3947,308.34534,53.840344,7.761905
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,2025,0.25,-79.25,...,47.372767,5.595238,2035,0.25,-79.25,299.68901,294.65768,308.46494,69.960389,8.285714
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,2025,0.25,-78.75,...,30.484589,4.928571,2035,0.25,-78.75,293.903718,288.2388,302.84344,52.584646,7.571429
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,2025,0.25,-78.25,...,17.060299,4.5,2035,0.25,-78.25,288.511259,282.30435,297.3421,27.69319,6.97619


In [36]:
merged_df = pd.merge(merged_df, result_2040, left_on=['lat', 'lon'], right_on=['lat_40', 'lon_40'], how='outer')
merged_df.head()

Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1,year_25,lat_25,lon_25,...,RR_35,RR1_35,year_40,lat_40,lon_40,TG_40,TN_40,TX_40,RR_40,RR1_40
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,2025,0.25,-80.25,...,23.321893,5.214286,2040,0.25,-80.25,299.167031,292.40375,307.01093,15.064381,3.5
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,2025,0.25,-79.75,...,53.840344,7.761905,2040,0.25,-79.75,299.359304,291.90714,308.45282,32.732717,6.261905
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,2025,0.25,-79.25,...,69.960389,8.285714,2040,0.25,-79.25,299.69196,292.0985,309.16003,39.34149,6.761905
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,2025,0.25,-78.75,...,52.584646,7.571429,2040,0.25,-78.75,293.965327,285.39587,305.29657,26.742078,5.666667
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,2025,0.25,-78.25,...,27.69319,6.97619,2040,0.25,-78.25,288.526837,279.4244,300.65524,16.278025,4.97619


In [37]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67420 entries, 0 to 67419
Data columns (total 39 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   lat      67420 non-null  float64
 1   lon      67420 non-null  float64
 2   TG       67420 non-null  float64
 3   TN       67420 non-null  float64
 4   TX       67420 non-null  float64
 5   RR       67420 non-null  float64
 6   RR1      67420 non-null  float64
 7   year_25  67420 non-null  int32  
 8   lat_25   67420 non-null  float64
 9   lon_25   67420 non-null  float64
 10  TG_25    67420 non-null  float64
 11  TN_25    67420 non-null  float64
 12  TX_25    67420 non-null  float64
 13  RR_25    67420 non-null  float64
 14  RR1_25   67420 non-null  float64
 15  year_30  67420 non-null  int32  
 16  lat_30   67420 non-null  float64
 17  lon_30   67420 non-null  float64
 18  TG_30    67420 non-null  float64
 19  TN_30    67420 non-null  float64
 20  TX_30    67420 non-null  float64
 21  RR_30    674

In [39]:
merged_df.rename(
    columns = {
        'TG': 'TG_21',
        'TN': 'TN_21',
        'TX': 'TX_21',
        'RR': 'RR_21',
        'RR1': 'RR1_21'
    }, inplace = True
)

merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67420 entries, 0 to 67419
Data columns (total 39 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   lat      67420 non-null  float64
 1   lon      67420 non-null  float64
 2   TG_21    67420 non-null  float64
 3   TN_21    67420 non-null  float64
 4   TX_21    67420 non-null  float64
 5   RR_21    67420 non-null  float64
 6   RR1_21   67420 non-null  float64
 7   year_25  67420 non-null  int32  
 8   lat_25   67420 non-null  float64
 9   lon_25   67420 non-null  float64
 10  TG_25    67420 non-null  float64
 11  TN_25    67420 non-null  float64
 12  TX_25    67420 non-null  float64
 13  RR_25    67420 non-null  float64
 14  RR1_25   67420 non-null  float64
 15  year_30  67420 non-null  int32  
 16  lat_30   67420 non-null  float64
 17  lon_30   67420 non-null  float64
 18  TG_30    67420 non-null  float64
 19  TN_30    67420 non-null  float64
 20  TX_30    67420 non-null  float64
 21  RR_30    674

In [41]:
drop_col = ['year_25', 'year_30', 'year_35', 'year_40',
            'lat_25', 'lat_30', 'lat_35', 'lat_40',
            'lon_25', 'lon_30', 'lon_35', 'lon_40']

In [42]:
merged_df.drop(drop_col, axis =1, inplace= True)

In [44]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67420 entries, 0 to 67419
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   lat     67420 non-null  float64
 1   lon     67420 non-null  float64
 2   TG_21   67420 non-null  float64
 3   TN_21   67420 non-null  float64
 4   TX_21   67420 non-null  float64
 5   RR_21   67420 non-null  float64
 6   RR1_21  67420 non-null  float64
 7   TG_25   67420 non-null  float64
 8   TN_25   67420 non-null  float64
 9   TX_25   67420 non-null  float64
 10  RR_25   67420 non-null  float64
 11  RR1_25  67420 non-null  float64
 12  TG_30   67420 non-null  float64
 13  TN_30   67420 non-null  float64
 14  TX_30   67420 non-null  float64
 15  RR_30   67420 non-null  float64
 16  RR1_30  67420 non-null  float64
 17  TG_35   67420 non-null  float64
 18  TN_35   67420 non-null  float64
 19  TX_35   67420 non-null  float64
 20  RR_35   67420 non-null  float64
 21  RR1_35  67420 non-null  float64
 22

In [43]:
merged_df.to_csv('./Climate/climate_data_2021-2040.csv')

In [26]:
df_test = pd.read_csv('./Climate/climate_data_2021-2040_v2.csv')
df_test.head()

Unnamed: 0.1,Unnamed: 0,year,lat,lon,TG,TN,TX,RR,RR1
0,54025,2021,-55.75,-68.25,279.585967,272.77362,287.27676,26.376604,7.738095
1,1,2025,-55.75,-68.25,280.370402,272.85995,286.98093,21.325952,6.642857
2,134841,2030,-55.75,-68.25,280.140747,273.69327,287.02786,19.199225,6.404762
3,269681,2035,-55.75,-68.25,279.759385,270.38922,286.5435,25.7513,7.404762
4,404521,2040,-55.75,-68.25,279.734722,272.4668,286.40137,21.069766,7.166667
