In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import Recall,Precision
from sklearn.metrics import f1_score

# 24시간 단위 예측 

In [2]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/LSTM/LSTM_data/ASOS_data_preprocessed_V2.csv', index_col=0)

In [3]:
# rain_tomorrow 열을 rainy_day 열에서 24시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-24)

In [4]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True) # 24시간 뒤에 데이터가 없는 마지막 24개열 삭제

In [5]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
341,1980-03-30 09:00,71.0,6.8,1019.8,1.5,1009.0,8.0,ScCi,0.0,5.7,6.2,7.3,7.5,500.0,1.7,50.0,0.0,0,1.0
342,1980-03-30 15:00,43.0,8.4,1017.5,4.5,1007.1,4.0,Ci,0.0,17.1,13.1,9.5,7.9,1000.0,2.5,230.0,0.0,0,1.0
343,1980-03-30 21:00,53.0,7.8,1017.7,3.4,1007.3,8.0,Cs,0.0,10.8,11.1,11.1,9.6,1000.0,4.0,290.0,0.0,0,1.0
368,1980-04-06 03:00,91.0,11.5,1002.5,9.0,992.0,10.0,StNs,0.0,12.0,12.9,13.5,13.1,500.0,5.3,180.0,0.0,0,1.0
369,1980-04-06 09:00,91.0,8.0,1005.8,3.8,995.1,10.0,ScAs,0.0,8.5,9.9,12.5,12.6,1500.0,5.3,200.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43387,2024-12-20 00:00:00,51.0,2.9,1026.8,-9.9,1015.9,10.0,no_clouds,0.0,-0.3,-0.4,1.3,2.5,2177.0,0.6,20.0,0.0,0,1.0
43390,2024-12-20 03:00:00,54.0,2.9,1026.2,-9.9,1015.2,9.0,Ci,4.2,-0.3,-0.4,1.3,2.4,2111.0,1.8,20.0,0.0,0,1.0
43393,2024-12-20 06:00:00,57.0,3.2,1025.1,-8.7,1014.2,10.0,ScCs,8.0,-0.4,-0.5,1.2,2.4,1922.0,1.6,20.0,0.0,0,1.0
43396,2024-12-20 09:00:00,58.0,3.5,1024.0,-7.5,1013.1,10.0,ScCs,9.0,-0.3,-0.4,1.2,2.4,1823.0,2.2,50.0,0.0,0,1.0


In [6]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [7]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [8]:
rain_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4304 entries, 18 to 60765
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             4304 non-null   float64
 1   pv             4304 non-null   float64
 2   td             4304 non-null   float64
 3   pa             4304 non-null   float64
 4   m005Te         4304 non-null   float64
 5   m01Te          4304 non-null   float64
 6   m02Te          4304 non-null   float64
 7   m03Te          4304 non-null   float64
 8   clfmAbbrCd     4304 non-null   object 
 9   lcsCh          4304 non-null   float64
 10  rain_tomorrow  4304 non-null   float64
dtypes: float64(10), object(1)
memory usage: 403.5+ KB


In [9]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=4304).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [10]:
data = pd.concat([rain_data, no_rain_data])

In [11]:
data = data.sample(frac=1).reset_index(drop=True) # 데이터 셔플

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8608 entries, 0 to 8607
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             8608 non-null   float64
 1   pv             8608 non-null   float64
 2   td             8608 non-null   float64
 3   pa             8608 non-null   float64
 4   m005Te         8608 non-null   float64
 5   m01Te          8608 non-null   float64
 6   m02Te          8608 non-null   float64
 7   m03Te          8608 non-null   float64
 8   clfmAbbrCd     8608 non-null   object 
 9   lcsCh          8608 non-null   float64
 10  rain_tomorrow  8608 non-null   float64
dtypes: float64(10), object(1)
memory usage: 739.9+ KB


In [13]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [14]:
train_X = data.iloc[:5000,:-1]
train_y = data.iloc[:5000,-1]
test_X = data.iloc[5000:,:-1]
test_y = data.iloc[5000:,-1]

In [15]:
train_X.shape, train_y.shape

((5000, 10), (5000,))

In [16]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [17]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((5000, 10), (3608, 10), (5000,), (3608,))

In [18]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Metal device set to: Apple M2 Pro
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                704       
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 256)               33024     
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_3 (Dense)      

2025-01-15 12:47:06.981695: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-01-15 12:47:06.981824: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [19]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-15 12:47:07.166694: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2025-01-15 12:47:07.382227: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-15 12:47:08.552196: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [20]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

  1/113 [..............................] - ETA: 41s - loss: 0.6270 - accuracy: 0.6875 - recall: 0.7059 - precision: 0.7059

2025-01-15 12:47:32.083491: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6374


In [21]:
y_hat = model.predict(test_X)



2025-01-15 12:47:33.682014: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [22]:
y_hat = y_hat.argmax(axis=1)

In [23]:
test_y.shape, y_hat.shape

((3608,), (3608,))

In [24]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.5083


# 3시간 단위 예측 

In [25]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/LSTM/LSTM_data/ASOS_data_preprocessed_V2.csv', index_col=0)

In [26]:
# rain_tomorrow 열을 rainy_day 열에서 3시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-3)

In [27]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
362,1980-04-04 15:00,55.0,13.3,1021.0,11.2,1010.7,10.0,ScAs,0.0,17.1,13.7,11.0,9.6,2000.0,7.8,200.0,0.0,0,1.0
363,1980-04-04 21:00,71.0,14.6,1020.3,12.6,1010.0,10.0,ScAs,0.0,14.5,13.8,12.5,11.2,1500.0,2.0,230.0,0.0,0,1.0
364,1980-04-05 03:00,69.0,14.1,1015.8,12.1,1005.6,10.0,As,0.0,13.5,13.1,12.5,11.3,1200.0,3.3,140.0,0.0,0,1.0
389,1980-04-11 09:00,66.0,8.2,1020.0,4.1,1009.4,10.0,Ci,0.0,9.3,8.8,10.1,10.6,700.0,1.5,50.0,0.0,0,1.0
390,1980-04-11 15:00,44.0,8.5,1018.4,4.6,1007.9,10.0,Ci,0.0,20.8,16.6,12.8,11.6,1800.0,3.0,230.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43417,2024-12-21 06:00:00,94.0,6.3,1015.6,0.5,1004.9,9.0,Sc,5.0,-0.1,-0.4,0.7,1.8,201.0,3.4,270.0,0.7,1,1.0
43528,2024-12-25 21:00:00,69.0,6.1,1022.1,-0.1,1011.5,9.0,ScCi,8.0,-0.1,-0.3,1.0,2.0,2155.0,2.0,200.0,0.0,0,1.0
43650,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
43651,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [28]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [29]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [30]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [31]:
rain_data

Unnamed: 0,hm,pv,td,pa,m005Te,m01Te,m02Te,m03Te,clfmAbbrCd,lcsCh,rain_tomorrow
36,99.0,21.8,18.9,1001.3,20.2,20.0,19.7,19.9,StNs,2.0,1.0
37,96.0,31.2,24.7,996.4,26.9,26.5,25.3,24.8,StNs,6.0,1.0
48,93.0,29.7,23.9,987.9,27.5,27.5,26.6,26.3,StNs,2.0,1.0
50,91.0,23.5,20.1,984.6,25.3,25.7,26.2,27.2,StNs,7.0,1.0
102,55.0,6.3,0.5,1006.4,4.2,3.8,3.3,3.6,ScAs,6.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
60648,97.0,29.7,23.9,994.3,26.2,25.9,25.9,25.8,StNs,5.0,1.0
60671,94.0,31.1,24.7,998.0,27.2,27.1,25.5,25.2,StNs,3.0,1.0
60681,99.0,30.4,24.3,997.5,26.3,26.5,26.1,26.5,StNs,4.0,1.0
60715,83.0,22.1,19.1,1000.3,22.6,23.0,23.4,23.5,StNs,0.0,1.0


In [32]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=4304).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [33]:
data = pd.concat([rain_data, no_rain_data])

In [34]:
data = data.sample(frac=1).reset_index(drop=True)

In [35]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8608 entries, 0 to 8607
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             8608 non-null   float64
 1   pv             8608 non-null   float64
 2   td             8608 non-null   float64
 3   pa             8608 non-null   float64
 4   m005Te         8608 non-null   float64
 5   m01Te          8608 non-null   float64
 6   m02Te          8608 non-null   float64
 7   m03Te          8608 non-null   float64
 8   clfmAbbrCd     8608 non-null   object 
 9   lcsCh          8608 non-null   float64
 10  rain_tomorrow  8608 non-null   float64
dtypes: float64(10), object(1)
memory usage: 739.9+ KB


In [36]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [37]:
train_X = data.iloc[:5000,:-1]
train_y = data.iloc[:5000,-1]
test_X = data.iloc[5000:,:-1]
test_y = data.iloc[5000:,-1]

In [38]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [39]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((5000, 10), (3608, 10), (5000,), (3608,))

In [40]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 64)                704       
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 128)               8320      
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 256)               33024     
                                                                 
 dropout_7 (Dropout)         (None, 256)               0         
                                                                 
 dense_9 (Dense)             (None, 64)               

In [41]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-15 12:47:34.324749: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-15 12:47:35.466084: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [42]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

  1/113 [..............................] - ETA: 34s - loss: 0.5538 - accuracy: 0.7500 - recall_1: 0.7333 - precision_1: 0.7333

2025-01-15 12:47:58.630890: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.7367


In [43]:
y_hat = model.predict(test_X)



2025-01-15 12:48:00.234879: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [44]:
y_hat = y_hat.argmax(axis=1)

In [45]:
test_y.shape, y_hat.shape

((3608,), (3608,))

In [46]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.4950


# 6시간 단위 예측

In [47]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/LSTM/LSTM_data/ASOS_data_preprocessed_V2.csv', index_col=0)

In [48]:
# rain_tomorrow 열을 rainy_day 열에서 6시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-6)

In [49]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
359,1980-04-03 21:00,67.0,9.8,1022.0,6.7,1011.5,0.0,no_cloud,0.0,11.6,12.2,11.9,10.0,2200.0,4.3,180.0,0.0,0,1.0
360,1980-04-04 03:00,76.0,9.3,1023.1,5.9,1012.4,0.0,no_cloud,0.0,7.8,9.2,10.4,9.9,2500.0,2.0,20.0,0.0,0,1.0
361,1980-04-04 09:00,51.0,7.9,1023.5,3.6,1013.0,8.0,ScAc,0.0,9.6,8.7,9.2,9.1,2000.0,1.5,50.0,0.0,0,1.0
386,1980-04-10 15:00,39.0,7.4,1021.2,2.7,1010.9,0.0,no_cloud,0.0,22.2,17.7,12.8,11.0,2500.0,2.0,230.0,0.0,0,1.0
387,1980-04-10 21:00,45.0,5.5,1021.6,-1.4,1011.0,0.0,no_cloud,0.0,11.2,13.3,13.9,12.6,2000.0,3.8,250.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43648,2024-12-30 21:00:00,67.0,6.1,1016.0,0.0,1005.4,6.0,Ci,2.0,-0.1,-0.3,0.8,1.7,1053.0,2.7,270.0,0.0,0,
43649,2024-12-30 22:00:00,71.0,6.1,1016.2,0.0,1005.6,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,906.0,2.4,270.0,0.0,0,
43650,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
43651,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [50]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [51]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [52]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [53]:
rain_data

Unnamed: 0,hm,pv,td,pa,m005Te,m01Te,m02Te,m03Te,clfmAbbrCd,lcsCh,rain_tomorrow
1,94.0,21.0,18.3,1010.7,23.2,22.9,22.9,23.1,StNs,3.0,1.0
4,93.0,7.0,1.9,1008.8,0.0,0.0,0.6,1.4,StNs,4.0,1.0
18,93.0,31.7,25.0,996.7,26.9,26.4,26.3,26.3,ScAs,8.0,1.0
34,29.0,7.6,3.0,1008.6,22.6,19.8,16.9,17.0,no_cloud,0.0,1.0
42,93.0,28.8,23.4,995.5,27.0,26.6,26.6,26.9,StNs,5.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
60668,94.0,31.1,24.7,998.0,27.2,27.1,25.5,25.2,StNs,3.0,1.0
60678,99.0,30.4,24.3,997.5,26.3,26.5,26.1,26.5,StNs,4.0,1.0
60712,83.0,22.1,19.1,1000.3,22.6,23.0,23.4,23.5,StNs,0.0,1.0
60729,92.0,5.1,-2.4,1003.3,0.0,-0.3,0.2,1.6,StNs,4.0,1.0


In [54]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=4304).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [55]:
data = pd.concat([rain_data, no_rain_data])

In [56]:
data = data.sample(frac=1).reset_index(drop=True)

In [57]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8608 entries, 0 to 8607
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             8608 non-null   float64
 1   pv             8608 non-null   float64
 2   td             8608 non-null   float64
 3   pa             8608 non-null   float64
 4   m005Te         8608 non-null   float64
 5   m01Te          8608 non-null   float64
 6   m02Te          8608 non-null   float64
 7   m03Te          8608 non-null   float64
 8   clfmAbbrCd     8608 non-null   object 
 9   lcsCh          8608 non-null   float64
 10  rain_tomorrow  8608 non-null   float64
dtypes: float64(10), object(1)
memory usage: 739.9+ KB


In [58]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [59]:
train_X = data.iloc[:5000,:-1]
train_y = data.iloc[:5000,-1]
test_X = data.iloc[5000:,:-1]
test_y = data.iloc[5000:,-1]

In [60]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [61]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((5000, 10), (3608, 10), (5000,), (3608,))

In [62]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 64)                704       
                                                                 
 dropout_10 (Dropout)        (None, 64)                0         
                                                                 
 dense_13 (Dense)            (None, 128)               8320      
                                                                 
 dropout_11 (Dropout)        (None, 128)               0         
                                                                 
 dense_14 (Dense)            (None, 256)               33024     
                                                                 
 dropout_12 (Dropout)        (None, 256)               0         
                                                                 
 dense_15 (Dense)            (None, 64)               

In [63]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-15 12:53:34.948291: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-15 12:53:36.008532: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [64]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

2025-01-15 12:53:58.694030: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6913


In [65]:
y_hat = model.predict(test_X)



2025-01-15 12:54:00.263360: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [66]:
y_hat = y_hat.argmax(axis=1)

In [67]:
test_y.shape, y_hat.shape

((3608,), (3608,))

In [68]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.5014


# 12시간 단위 예측 

In [69]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/LSTM/LSTM_data/ASOS_data_preprocessed_V2.csv', index_col=0)

In [70]:
# rain_tomorrow 열을 rainy_day 열에서 12시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-12)

In [71]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
353,1980-04-02 09:00,50.0,4.2,1025.2,-5.0,1014.4,0.0,no_cloud,0.0,3.8,4.9,6.8,7.3,2200.0,3.0,270.0,0.0,0,1.0
354,1980-04-02 15:00,25.0,3.4,1023.9,-7.8,1013.3,0.0,no_cloud,0.0,16.6,12.1,8.6,7.5,2500.0,3.5,230.0,0.0,0,1.0
355,1980-04-02 21:00,31.0,3.0,1024.4,-9.4,1013.7,1.0,Sc,0.0,7.9,9.5,10.0,9.0,1500.0,1.0,270.0,0.0,0,1.0
380,1980-04-09 03:00,78.0,8.2,1016.3,4.1,1005.7,4.0,As,0.0,7.8,9.7,11.3,11.0,2000.0,3.0,340.0,0.0,0,1.0
381,1980-04-09 09:00,64.0,7.5,1018.1,2.9,1007.5,5.0,Ci,0.0,8.8,8.3,9.7,10.0,1500.0,1.0,270.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43648,2024-12-30 21:00:00,67.0,6.1,1016.0,0.0,1005.4,6.0,Ci,2.0,-0.1,-0.3,0.8,1.7,1053.0,2.7,270.0,0.0,0,
43649,2024-12-30 22:00:00,71.0,6.1,1016.2,0.0,1005.6,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,906.0,2.4,270.0,0.0,0,
43650,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
43651,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [72]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [73]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [74]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [75]:
rain_data

Unnamed: 0,hm,pv,td,pa,m005Te,m01Te,m02Te,m03Te,clfmAbbrCd,lcsCh,rain_tomorrow
34,79.0,26.1,21.8,995.3,27.3,27.2,26.2,25.8,Sc,8.666667,1.0
35,55.0,17.7,15.6,1004.2,23.9,23.5,22.4,22.5,As,28.000000,1.0
48,90.0,22.6,19.4,996.2,23.0,25.4,26.6,26.3,Sc,10.000000,1.0
66,55.0,17.2,15.1,1004.2,24.0,23.6,22.6,22.6,As,28.000000,1.0
79,68.0,4.3,-4.7,1022.6,0.0,0.1,1.4,1.5,no_cloud,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...
60680,56.0,10.3,7.4,999.6,18.3,18.3,17.8,17.0,ScAs,10.000000,1.0
60685,60.0,16.4,14.4,1007.1,21.8,21.7,21.6,22.4,As,30.000000,1.0
60687,71.0,18.5,16.3,995.3,22.9,22.2,21.3,21.5,ScAs,7.000000,1.0
60698,39.0,8.4,4.5,1001.1,15.4,13.2,13.6,13.6,AcCi,42.000000,1.0


In [76]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=4304).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [77]:
data = pd.concat([rain_data, no_rain_data])

In [78]:
data = data.sample(frac=1).reset_index(drop=True)

In [79]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8608 entries, 0 to 8607
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             8608 non-null   float64
 1   pv             8608 non-null   float64
 2   td             8608 non-null   float64
 3   pa             8608 non-null   float64
 4   m005Te         8608 non-null   float64
 5   m01Te          8608 non-null   float64
 6   m02Te          8608 non-null   float64
 7   m03Te          8608 non-null   float64
 8   clfmAbbrCd     8608 non-null   object 
 9   lcsCh          8608 non-null   float64
 10  rain_tomorrow  8608 non-null   float64
dtypes: float64(10), object(1)
memory usage: 739.9+ KB


In [80]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [81]:
train_X = data.iloc[:5000,:-1]
train_y = data.iloc[:5000,-1]
test_X = data.iloc[5000:,:-1]
test_y = data.iloc[5000:,-1]

In [82]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [83]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((5000, 10), (3608, 10), (5000,), (3608,))

In [84]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 64)                704       
                                                                 
 dropout_15 (Dropout)        (None, 64)                0         
                                                                 
 dense_19 (Dense)            (None, 128)               8320      
                                                                 
 dropout_16 (Dropout)        (None, 128)               0         
                                                                 
 dense_20 (Dense)            (None, 256)               33024     
                                                                 
 dropout_17 (Dropout)        (None, 256)               0         
                                                                 
 dense_21 (Dense)            (None, 64)               

In [85]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-15 13:54:08.337383: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-15 13:54:09.467743: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [86]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

2025-01-15 13:54:32.531486: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6537


In [87]:
y_hat = model.predict(test_X)



2025-01-15 13:54:34.177359: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [88]:
y_hat = y_hat.argmax(axis=1)

In [89]:
test_y.shape, y_hat.shape

((3608,), (3608,))

In [90]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.4953
