In [1]:
import xarray as xr
import pandas as pd

In [2]:
data_path = {
    'tg': './Climate/RAW/TG_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'tn': './Climate/RAW/TN_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'tx': './Climate/RAW/TX_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'rr': './Climate/RAW/RR_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc',
    'rr1': './Climate/RAW/RR1_C3S-glob-agric_gfdl-esm2m_rcp2p6_dek_20110101-20401231_v1.0.nc'
}

In [5]:
# 예측데이터에 활용할 연도 추출
target_year = [2025, 2030, 2035, 2040]

### TG 로드하기

In [3]:
tg = xr.open_dataset(data_path['tg'])

In [4]:
tg = tg.to_dataframe()
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TG
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-05,-89.75,-179.75,0,2011-01-01,
2011-01-05,-89.75,-179.75,1,2011-01-11,
2011-01-05,-89.75,-179.25,0,2011-01-01,
2011-01-05,-89.75,-179.25,1,2011-01-11,
2011-01-05,-89.75,-178.75,0,2011-01-01,


In [7]:
tg = tg[tg['time_bounds'].dt.year.isin(target_year)]
tg = tg[~tg['TG'].isna()]
tg.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   TG           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


### TN 로드하기

In [8]:
tn = xr.open_dataset(data_path['tn'])

In [9]:
tn = tn.to_dataframe()
tn.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TN
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-05,-89.75,-179.75,0,2011-01-01,
2011-01-05,-89.75,-179.75,1,2011-01-11,
2011-01-05,-89.75,-179.25,0,2011-01-01,
2011-01-05,-89.75,-179.25,1,2011-01-11,
2011-01-05,-89.75,-178.75,0,2011-01-01,


In [10]:
tn = tn[tn['time_bounds'].dt.year.isin(target_year)]
tn = tn[~tn['TN'].isna()]
tn.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   TN           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


### TX 로드하기

In [12]:
tx = xr.open_dataset(data_path['tx'])

In [13]:
tx = tx.to_dataframe()

In [14]:
tx = tx[tx['time_bounds'].dt.year.isin(target_year)]
tx = tx[~tx['TX'].isna()]

### TG, TN, TX 합치기

In [15]:
temp_df = pd.concat([tg, tn['TN'], tx['TX']], axis = 1)
temp_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_bounds,TG,TN,TX
time,lat,lon,bounds,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186
2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434
2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068
2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115
2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984


In [16]:
temp_df = temp_df.reset_index()

In [17]:
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984


In [18]:
# temp만 모아 중간 저장
temp_df.to_csv('./Climate/climate_data_temp_for_prediction.csv')

### RR 로드하기

In [19]:
rr = xr.open_dataset(data_path['rr'])

In [20]:
rr = rr.to_dataframe()

In [21]:
rr = rr[rr['time_bounds'].dt.year.isin(target_year)]
rr = rr[~rr['RR'].isna()]
rr.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   RR           float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


In [25]:
rr = rr.reset_index()

### RR1 로드하기

In [22]:
rr1 = xr.open_dataset(data_path['rr1'])

In [23]:
rr1 = rr1.to_dataframe()

In [24]:
rr1 = rr1[rr1['time_bounds'].dt.year.isin(target_year)]
rr1 = rr1[~rr1['RR1'].isna()]
rr1.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19416960 entries, (Timestamp('2024-12-25 00:00:00'), -55.75, -68.25, 1) to (Timestamp('2040-12-25 00:00:00'), 83.75, -28.75, 0)
Data columns (total 2 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time_bounds  datetime64[ns]
 1   RR1          float32       
dtypes: datetime64[ns](1), float32(1)
memory usage: 351.9 MB


In [26]:
rr1 = rr1.reset_index()

### temp데이터와 RR, RR1 합치기

In [27]:
temp_df = pd.concat([temp_df, rr['RR'], rr1['RR1']], axis = 1)
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0


In [28]:
# 중간저장
temp_df.to_csv('./Climate/climate_data_for_prediction.csv')

### 성장시기에 따라 데이터 합치기

1. time_bounds를 통해 'month' column을 새로 생성한다.
2. 남반구/북반구 분류 - 'northern' column을 사용하여 lat >= 0 : 1, lat < 0: 0 값을 배정한다.
3. growing season 컬럼을 새로 생성하여 북반구: 4~10월에 1 / 남반구: 10~4월에 1값을 배정함
4. 데이터를 각각 2025년, 2030년, 2035년, 2040년 데이터프레임으로 나눈다.
5. 데이터 각각 groupby로 growing season의 집계값을 구한다.

In [29]:
import numpy as np

In [30]:
temp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19416960 entries, 0 to 19416959
Data columns (total 10 columns):
 #   Column       Dtype         
---  ------       -----         
 0   time         datetime64[ns]
 1   lat          float64       
 2   lon          float64       
 3   bounds       int64         
 4   time_bounds  datetime64[ns]
 5   TG           float32       
 6   TN           float32       
 7   TX           float32       
 8   RR           float32       
 9   RR1          float32       
dtypes: datetime64[ns](2), float32(5), float64(2), int64(1)
memory usage: 1.1 GB


In [31]:
temp_df['month'] = temp_df['time_bounds'].dt.month
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0,1
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0,1
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0,1
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0,1
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0,1


In [32]:
temp_df['northern'] = np.where(temp_df['lat']>= 0, 1, 0)
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0,1,0
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0,1,0
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0,1,0
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0,1,0
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0,1,0


In [34]:
temp_df.tail()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern
19416955,2040-12-25,83.75,-30.75,0,2040-12-21,244.013245,242.113861,245.854584,0.005922,0.0,12,1
19416956,2040-12-25,83.75,-30.25,0,2040-12-21,244.013046,242.116165,245.860504,0.005414,0.0,12,1
19416957,2040-12-25,83.75,-29.75,0,2040-12-21,244.01329,242.119049,245.866898,0.092338,0.0,12,1
19416958,2040-12-25,83.75,-29.25,0,2040-12-21,244.381577,242.489944,246.241348,0.0,0.0,12,1
19416959,2040-12-25,83.75,-28.75,0,2040-12-21,244.608627,242.733398,246.458588,0.0,0.0,12,1


In [39]:
# growing season
# 1: ((df_temp['northern'] == 1) & (temp_df['month']>= 4 & temp_df['month']<= 10)) | ((df_temp['northern'] == 0) & (temp_df['month] >= 10 | temp_df['month] <= 4))

temp_df['growing_season'] = 0

cond1 = (temp_df['northern'] == 1) & ((temp_df['month']>= 4) & (temp_df['month']<= 10))
cond2 = (temp_df['northern'] == 0) & ((temp_df['month'] >= 10) | (temp_df['month'] <= 4))
temp_df.loc[cond1 | cond2, 'growing_season'] = 1
temp_df.head()


Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern,growing_season
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0,1,0,1
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0,1,0,1
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0,1,0,1
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0,1,0,1
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0,1,0,1


In [40]:
temp_df['year'] = temp_df['time_bounds'].dt.year
temp_df.head()

Unnamed: 0,time,lat,lon,bounds,time_bounds,TG,TN,TX,RR,RR1,month,northern,growing_season,year
0,2024-12-25,-55.75,-68.25,1,2025-01-01,281.818817,278.288239,285.328186,26.840868,9.0,1,0,1,2025
1,2024-12-25,-55.75,-67.75,1,2025-01-01,281.954468,278.418518,285.488434,27.834499,10.0,1,0,1,2025
2,2024-12-25,-55.75,-67.25,1,2025-01-01,282.01004,278.168182,285.863068,29.812483,10.0,1,0,1,2025
3,2024-12-25,-55.25,-70.75,1,2025-01-01,280.976196,277.327759,284.486115,16.897108,7.0,1,0,1,2025
4,2024-12-25,-55.25,-70.25,1,2025-01-01,281.39444,277.662537,284.92984,17.66687,7.0,1,0,1,2025


In [42]:
result = temp_df.groupby(['year', 'lat', 'lon', 'growing_season']).agg({
    'TG': 'mean',
    'TN': 'min',
    'TX': 'max',
    'RR': 'mean',
    'RR1': 'mean'
}
).reset_index()

In [44]:
result = result[result['growing_season'] == 1]
result.head()

Unnamed: 0,year,lat,lon,growing_season,TG,TN,TX,RR,RR1
1,2025,-55.75,-68.25,1,280.370392,272.859955,286.980927,21.325951,6.642857
3,2025,-55.75,-67.75,1,280.408722,272.805481,287.309967,22.646383,6.809524
5,2025,-55.75,-67.25,1,280.289856,272.406403,287.938965,22.818377,6.785714
7,2025,-55.25,-70.75,1,279.803711,272.30719,285.927155,13.733531,5.476191
9,2025,-55.25,-70.25,1,280.079926,272.604309,286.130768,14.348863,5.642857


In [45]:
result.drop('growing_season', axis = 1, inplace = True)

In [47]:
result.head()

Unnamed: 0,year,lat,lon,TG,TN,TX,RR,RR1
1,2025,-55.75,-68.25,280.370392,272.859955,286.980927,21.325951,6.642857
3,2025,-55.75,-67.75,280.408722,272.805481,287.309967,22.646383,6.809524
5,2025,-55.75,-67.25,280.289856,272.406403,287.938965,22.818377,6.785714
7,2025,-55.25,-70.75,279.803711,272.30719,285.927155,13.733531,5.476191
9,2025,-55.25,-70.25,280.079926,272.604309,286.130768,14.348863,5.642857


In [48]:
result_2025 = result[result['year']== 2025]
result_2025.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 1 to 134839
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float32
 4   TN      67420 non-null  float32
 5   TX      67420 non-null  float32
 6   RR      67420 non-null  float32
 7   RR1     67420 non-null  float32
dtypes: float32(5), float64(2), int32(1)
memory usage: 3.1 MB


In [57]:
col_2025 = [col for col in result_2025.columns]
col_2025

['year', 'lat', 'lon', 'TG', 'TN', 'TX', 'RR', 'RR1']

In [58]:
col_rename_2025 = {
    col : col+"_25" for col in col_2025
}

col_rename_2025

{'year': 'year_25',
 'lat': 'lat_25',
 'lon': 'lon_25',
 'TG': 'TG_25',
 'TN': 'TN_25',
 'TX': 'TX_25',
 'RR': 'RR_25',
 'RR1': 'RR1_25'}

In [60]:
result_2025.rename(columns = col_rename_2025, inplace = True)
result_2025.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 1 to 134839
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   year_25  67420 non-null  int32  
 1   lat_25   67420 non-null  float64
 2   lon_25   67420 non-null  float64
 3   TG_25    67420 non-null  float32
 4   TN_25    67420 non-null  float32
 5   TX_25    67420 non-null  float32
 6   RR_25    67420 non-null  float32
 7   RR1_25   67420 non-null  float32
dtypes: float32(5), float64(2), int32(1)
memory usage: 3.1 MB


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2025.rename(columns = col_rename_2025, inplace = True)


In [50]:
result_2030 = result[result['year']== 2030]
result_2030.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 134841 to 269679
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float32
 4   TN      67420 non-null  float32
 5   TX      67420 non-null  float32
 6   RR      67420 non-null  float32
 7   RR1     67420 non-null  float32
dtypes: float32(5), float64(2), int32(1)
memory usage: 3.1 MB


In [51]:
result_2035 = result[result['year']== 2035]
result_2035.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 269681 to 404519
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float32
 4   TN      67420 non-null  float32
 5   TX      67420 non-null  float32
 6   RR      67420 non-null  float32
 7   RR1     67420 non-null  float32
dtypes: float32(5), float64(2), int32(1)
memory usage: 3.1 MB


In [53]:
result_2040 = result[result['year']== 2040]
result_2040.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67420 entries, 404521 to 539359
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    67420 non-null  int32  
 1   lat     67420 non-null  float64
 2   lon     67420 non-null  float64
 3   TG      67420 non-null  float32
 4   TN      67420 non-null  float32
 5   TX      67420 non-null  float32
 6   RR      67420 non-null  float32
 7   RR1     67420 non-null  float32
dtypes: float32(5), float64(2), int32(1)
memory usage: 3.1 MB


### Column 이름 변경하기

In [61]:
col_name = [col for col in result.columns]
col_rename_2030 = {col : col+"_30" for col in col_name}
col_rename_2035 = {col : col+"_35" for col in col_name}
col_rename_2040 = {col : col+"_40" for col in col_name}

In [62]:
result_2030.rename(columns = col_rename_2030, inplace = True)
result_2035.rename(columns = col_rename_2035, inplace = True)
result_2040.rename(columns = col_rename_2040, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2030.rename(columns = col_rename_2030, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2035.rename(columns = col_rename_2035, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_2040.rename(columns = col_rename_2040, inplace = True)


In [63]:
result_2025

Unnamed: 0,year_25,lat_25,lon_25,TG_25,TN_25,TX_25,RR_25,RR1_25
1,2025,-55.75,-68.25,280.370392,272.859955,286.980927,21.325951,6.642857
3,2025,-55.75,-67.75,280.408722,272.805481,287.309967,22.646383,6.809524
5,2025,-55.75,-67.25,280.289856,272.406403,287.938965,22.818377,6.785714
7,2025,-55.25,-70.75,279.803711,272.307190,285.927155,13.733531,5.476191
9,2025,-55.25,-70.25,280.079926,272.604309,286.130768,14.348863,5.642857
...,...,...,...,...,...,...,...,...
134831,2025,83.75,-30.75,266.530884,242.431305,282.676575,3.396297,0.666667
134833,2025,83.75,-30.25,266.682556,242.383453,282.886963,3.425099,0.666667
134835,2025,83.75,-29.75,266.816559,242.354385,283.000305,3.479614,0.666667
134837,2025,83.75,-29.25,267.265839,242.606064,283.341370,0.935276,0.380952


In [64]:
result_2030

Unnamed: 0,year_30,lat_30,lon_30,TG_30,TN_30,TX_30,RR_30,RR1_30
134841,2030,-55.75,-68.25,280.140747,273.693268,287.027863,19.199226,6.404762
134843,2030,-55.75,-67.75,280.142731,273.505768,287.077637,20.584030,6.595238
134845,2030,-55.75,-67.25,279.986633,272.876740,287.396210,20.933834,6.500000
134847,2030,-55.25,-70.75,279.720825,273.426819,286.169922,12.678609,4.571429
134849,2030,-55.25,-70.25,279.967377,273.739624,286.481995,13.346206,4.833333
...,...,...,...,...,...,...,...,...
269671,2030,83.75,-30.75,266.467072,238.726227,280.432526,3.425451,0.571429
269673,2030,83.75,-30.25,266.623535,238.790329,280.664215,3.389993,0.523810
269675,2030,83.75,-29.75,266.762146,238.866760,280.803253,3.375963,0.523810
269677,2030,83.75,-29.25,267.216217,239.230972,281.203156,0.936212,0.238095


In [65]:
result_2035

Unnamed: 0,year_35,lat_35,lon_35,TG_35,TN_35,TX_35,RR_35,RR1_35
269681,2035,-55.75,-68.25,279.759399,270.389221,286.543488,25.751299,7.404762
269683,2035,-55.75,-67.75,279.788239,269.978821,286.546997,27.248379,7.500000
269685,2035,-55.75,-67.25,279.659271,269.033630,286.987823,27.412136,7.476191
269687,2035,-55.25,-70.75,279.226501,270.765961,286.527344,16.889334,6.285714
269689,2035,-55.25,-70.25,279.491791,270.836182,286.692413,17.673714,6.500000
...,...,...,...,...,...,...,...,...
404511,2035,83.75,-30.75,265.309692,237.436188,280.333099,2.755309,0.714286
404513,2035,83.75,-30.25,265.457092,237.489548,280.547302,2.757977,0.714286
404515,2035,83.75,-29.75,265.586670,237.557968,280.683136,2.764056,0.714286
404517,2035,83.75,-29.25,266.031372,237.912262,281.092499,0.762055,0.285714


In [66]:
result_2040

Unnamed: 0,year_40,lat_40,lon_40,TG_40,TN_40,TX_40,RR_40,RR1_40
404521,2040,-55.75,-68.25,279.734711,272.466797,286.401367,21.069767,7.166667
404523,2040,-55.75,-67.75,279.792664,272.408691,286.459747,22.270613,7.333333
404525,2040,-55.75,-67.25,279.692810,271.971924,286.828003,22.403786,7.238095
404527,2040,-55.25,-70.75,279.087463,271.402405,284.701874,14.235677,5.904762
404529,2040,-55.25,-70.25,279.378815,271.794128,285.196503,14.702337,5.857143
...,...,...,...,...,...,...,...,...
539351,2040,83.75,-30.75,265.067291,239.310699,280.962830,3.162781,0.619048
539353,2040,83.75,-30.25,265.221649,239.340775,281.227875,3.184580,0.619048
539355,2040,83.75,-29.75,265.358673,239.380524,281.401337,3.216710,0.666667
539357,2040,83.75,-29.25,265.810394,239.709717,281.835449,0.903655,0.380952


### 2021 Train 데이터에 [2025, 2030, 2035, 2040] 값 붙여주기

In [67]:
df = pd.read_csv('./Climate/climate_data_2021_growing_season.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1
0,0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [68]:
df.set_index('Unnamed: 0', inplace= True)
df.head()

Unnamed: 0_level_0,lat,lon,TG,TN,TX,RR,RR1
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476


In [69]:
result_2025.iloc[:, 3:]

Unnamed: 0,TG_25,TN_25,TX_25,RR_25,RR1_25
1,280.370392,272.859955,286.980927,21.325951,6.642857
3,280.408722,272.805481,287.309967,22.646383,6.809524
5,280.289856,272.406403,287.938965,22.818377,6.785714
7,279.803711,272.307190,285.927155,13.733531,5.476191
9,280.079926,272.604309,286.130768,14.348863,5.642857
...,...,...,...,...,...
134831,266.530884,242.431305,282.676575,3.396297,0.666667
134833,266.682556,242.383453,282.886963,3.425099,0.666667
134835,266.816559,242.354385,283.000305,3.479614,0.666667
134837,267.265839,242.606064,283.341370,0.935276,0.380952


In [71]:
df = pd.concat([df, result_2025.iloc[:, 3: ], result_2030.iloc[:, 3:], result_2035.iloc[:, 3:], result_2040.iloc[:, 3:]], axis = 1)
df.head()

Unnamed: 0,lat,lon,TG,TN,TX,RR,RR1,TG_25,TN_25,TX_25,...,TG_35,TN_35,TX_35,RR_35,RR1_35,TG_40,TN_40,TX_40,RR_40,RR1_40
0,0.25,-80.25,299.502427,293.87598,306.98184,27.746031,5.309524,,,,...,,,,,,,,,,
1,0.25,-79.75,299.484624,293.64417,307.2451,63.80115,7.52381,280.370392,272.859955,286.980927,...,,,,,,,,,,
2,0.25,-79.25,299.607305,293.66605,307.08545,82.949785,8.095238,,,,...,,,,,,,,,,
3,0.25,-78.75,293.708853,287.09363,301.88147,60.724637,7.904762,280.408722,272.805481,287.309967,...,,,,,,,,,,
4,0.25,-78.25,288.31511,281.21375,297.1327,30.210875,7.190476,,,,...,,,,,,,,,,


In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 303390 entries, 0 to 539359
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   lat     67420 non-null  float64
 1   lon     67420 non-null  float64
 2   TG      67420 non-null  float64
 3   TN      67420 non-null  float64
 4   TX      67420 non-null  float64
 5   RR      67420 non-null  float64
 6   RR1     67420 non-null  float64
 7   TG_25   67420 non-null  float32
 8   TN_25   67420 non-null  float32
 9   TX_25   67420 non-null  float32
 10  RR_25   67420 non-null  float32
 11  RR1_25  67420 non-null  float32
 12  TG_30   67420 non-null  float32
 13  TN_30   67420 non-null  float32
 14  TX_30   67420 non-null  float32
 15  RR_30   67420 non-null  float32
 16  RR1_30  67420 non-null  float32
 17  TG_35   67420 non-null  float32
 18  TN_35   67420 non-null  float32
 19  TX_35   67420 non-null  float32
 20  RR_35   67420 non-null  float32
 21  RR1_35  67420 non-null  float32
 22  T

In [73]:
df.rename(
    columns = {
        'TG': 'TG_21',
        'TN': 'TN_21',
        'TX': 'TX_21',
        'RR': 'RR_21',
        'RR1': 'RR1_21'
    }, inplace = True
)

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 303390 entries, 0 to 539359
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   lat     67420 non-null  float64
 1   lon     67420 non-null  float64
 2   TG_21   67420 non-null  float64
 3   TN_21   67420 non-null  float64
 4   TX_21   67420 non-null  float64
 5   RR_21   67420 non-null  float64
 6   RR1_21  67420 non-null  float64
 7   TG_25   67420 non-null  float32
 8   TN_25   67420 non-null  float32
 9   TX_25   67420 non-null  float32
 10  RR_25   67420 non-null  float32
 11  RR1_25  67420 non-null  float32
 12  TG_30   67420 non-null  float32
 13  TN_30   67420 non-null  float32
 14  TX_30   67420 non-null  float32
 15  RR_30   67420 non-null  float32
 16  RR1_30  67420 non-null  float32
 17  TG_35   67420 non-null  float32
 18  TN_35   67420 non-null  float32
 19  TX_35   67420 non-null  float32
 20  RR_35   67420 non-null  float32
 21  RR1_35  67420 non-null  float32
 22  T

In [74]:
df.to_csv('./Climate/climate_data_2021-2040.csv')