In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import Recall,Precision
from sklearn.metrics import f1_score

# 24시간 단위 예측 

In [206]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/shared_data_preparation/ASOS_data_preprocessed_V3.csv')

In [207]:
# rain_tomorrow 열을 rainy_day 열에서 24시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-24)

In [208]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True) # 24시간 뒤에 데이터가 없는 마지막 24개열 삭제

In [209]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
357,1980-03-30 09:00,71.0,6.8,1019.8,1.5,1009.0,8.0,ScCi,0.0,5.7,6.2,7.3,7.5,500.0,1.7,50.0,0.0,0,1.0
358,1980-03-30 15:00,43.0,8.4,1017.5,4.5,1007.1,4.0,Ci,0.0,17.1,13.1,9.5,7.9,1000.0,2.5,230.0,0.0,0,1.0
359,1980-03-30 21:00,53.0,7.8,1017.7,3.4,1007.3,8.0,Cs,0.0,10.8,11.1,11.1,9.6,1000.0,4.0,290.0,0.0,0,1.0
384,1980-04-06 03:00,91.0,11.5,1002.5,9.0,992.0,10.0,StNs,0.0,12.0,12.9,13.5,13.1,500.0,5.3,180.0,0.0,0,1.0
385,1980-04-06 09:00,91.0,8.0,1005.8,3.8,995.1,10.0,ScAs,0.0,8.5,9.9,12.5,12.6,1500.0,5.3,200.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95561,2024-12-20 00:00:00,51.0,2.9,1026.8,-9.9,1015.9,10.0,no_clouds,0.0,-0.3,-0.4,1.3,2.5,2177.0,0.6,20.0,0.0,0,1.0
95564,2024-12-20 03:00:00,54.0,2.9,1026.2,-9.9,1015.2,9.0,Ci,4.2,-0.3,-0.4,1.3,2.4,2111.0,1.8,20.0,0.0,0,1.0
95567,2024-12-20 06:00:00,57.0,3.2,1025.1,-8.7,1014.2,10.0,ScCs,8.0,-0.4,-0.5,1.2,2.4,1922.0,1.6,20.0,0.0,0,1.0
95570,2024-12-20 09:00:00,58.0,3.5,1024.0,-7.5,1013.1,10.0,ScCs,9.0,-0.3,-0.4,1.2,2.4,1823.0,2.2,50.0,0.0,0,1.0


In [210]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [211]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [212]:
len(rain_data)

7352

In [213]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=len(rain_data)).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [214]:
data = pd.concat([rain_data, no_rain_data])

In [215]:
data = data.sample(frac=1).reset_index(drop=True) # 데이터 셔플

In [216]:
split_index = round((len(data)/10)*7)

In [217]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [218]:
train_X = data.iloc[:split_index,:-1]
train_y = data.iloc[:split_index,-1]
test_X = data.iloc[split_index:,:-1]
test_y = data.iloc[split_index:,-1]

In [219]:
train_X.shape, train_y.shape

((10293, 10), (10293,))

In [220]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [221]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((10293, 10), (4411, 10), (10293,), (4411,))

In [222]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_48 (Dense)            (None, 64)                704       
                                                                 
 dropout_40 (Dropout)        (None, 64)                0         
                                                                 
 dense_49 (Dense)            (None, 128)               8320      
                                                                 
 dropout_41 (Dropout)        (None, 128)               0         
                                                                 
 dense_50 (Dense)            (None, 256)               33024     
                                                                 
 dropout_42 (Dropout)        (None, 256)               0         
                                                                 
 dense_51 (Dense)            (None, 64)               

In [223]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-16 17:31:12.605385: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-16 17:31:15.132513: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [106]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

2025-01-16 17:16:50.389133: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6108


In [203]:
y_hat = (model.predict(test_X)>=0.5).reshape(-1)



In [204]:
test_y.shape, y_hat.shape

((4411,), (4411,))

In [205]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.7533


# 3시간 단위 예측 

In [160]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/shared_data_preparation/ASOS_data_preprocessed_V3.csv')

In [161]:
# rain_tomorrow 열을 rainy_day 열에서 3시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-3)

In [162]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
378,1980-04-04 15:00,55.0,13.3,1021.0,11.2,1010.7,10.0,ScAs,0.0,17.1,13.7,11.0,9.6,2000.0,7.8,200.0,0.0,0,1.0
379,1980-04-04 21:00,71.0,14.6,1020.3,12.6,1010.0,10.0,ScAs,0.0,14.5,13.8,12.5,11.2,1500.0,2.0,230.0,0.0,0,1.0
380,1980-04-05 03:00,69.0,14.1,1015.8,12.1,1005.6,10.0,As,0.0,13.5,13.1,12.5,11.3,1200.0,3.3,140.0,0.0,0,1.0
405,1980-04-11 09:00,66.0,8.2,1020.0,4.1,1009.4,10.0,Ci,0.0,9.3,8.8,10.1,10.6,700.0,1.5,50.0,0.0,0,1.0
406,1980-04-11 15:00,44.0,8.5,1018.4,4.6,1007.9,10.0,Ci,0.0,20.8,16.6,12.8,11.6,1800.0,3.0,230.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95591,2024-12-21 06:00:00,94.0,6.3,1015.6,0.5,1004.9,9.0,Sc,5.0,-0.1,-0.4,0.7,1.8,201.0,3.4,270.0,0.7,1,1.0
95702,2024-12-25 21:00:00,69.0,6.1,1022.1,-0.1,1011.5,9.0,ScCi,8.0,-0.1,-0.3,1.0,2.0,2155.0,2.0,200.0,0.0,0,1.0
95824,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
95825,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [163]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [164]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [165]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [166]:
len(rain_data)

7352

In [167]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=len(rain_data)).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [168]:
data = pd.concat([rain_data, no_rain_data])

In [169]:
data = data.sample(frac=1).reset_index(drop=True)

In [170]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14704 entries, 0 to 14703
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             14704 non-null  float64
 1   pv             14704 non-null  float64
 2   td             14704 non-null  float64
 3   pa             14704 non-null  float64
 4   m005Te         14704 non-null  float64
 5   m01Te          14704 non-null  float64
 6   m02Te          14704 non-null  float64
 7   m03Te          14704 non-null  float64
 8   clfmAbbrCd     14704 non-null  object 
 9   lcsCh          14704 non-null  float64
 10  rain_tomorrow  14704 non-null  float64
dtypes: float64(10), object(1)
memory usage: 1.2+ MB


In [171]:
split_index = round((len(data)/10)*7)

In [172]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [173]:
train_X = data.iloc[:split_index,:-1]
train_y = data.iloc[:split_index,-1]
test_X = data.iloc[split_index:,:-1]
test_y = data.iloc[split_index:,-1]

In [174]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [175]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((10293, 10), (4411, 10), (10293,), (4411,))

In [177]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_42 (Dense)            (None, 64)                704       
                                                                 
 dropout_35 (Dropout)        (None, 64)                0         
                                                                 
 dense_43 (Dense)            (None, 128)               8320      
                                                                 
 dropout_36 (Dropout)        (None, 128)               0         
                                                                 
 dense_44 (Dense)            (None, 256)               33024     
                                                                 
 dropout_37 (Dropout)        (None, 256)               0         
                                                                 
 dense_45 (Dense)            (None, 64)               

In [178]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-16 17:26:42.840789: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-16 17:26:45.238474: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [180]:
loss, accuracy, _, _, = model.evaluate(test_X, test_y)
print(f"테스트 정확도: {accuracy:.4f}")

테스트 정확도: 0.7533


In [198]:
y_hat = (model.predict(test_X)>=0.5).reshape(-1)



In [199]:
pd.DataFrame(y_hat).value_counts()

False    2290
True     2121
dtype: int64

In [200]:
pd.DataFrame(test_y).value_counts()

rain_tomorrow
1.0              2233
0.0              2178
dtype: int64

In [201]:
test_y.shape, y_hat.shape

((4411,), (4411,))

In [202]:
f1 = f1_score(test_y, y_hat, average='binary')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.7501


# 6시간 단위 예측

In [240]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/shared_data_preparation/ASOS_data_preprocessed_V3.csv')

In [241]:
# rain_tomorrow 열을 rainy_day 열에서 6시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-6)

In [242]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
375,1980-04-03 21:00,67.0,9.8,1022.0,6.7,1011.5,0.0,no_clouds,0.0,11.6,12.2,11.9,10.0,2200.0,4.3,180.0,0.0,0,1.0
376,1980-04-04 03:00,76.0,9.3,1023.1,5.9,1012.4,0.0,no_clouds,0.0,7.8,9.2,10.4,9.9,2500.0,2.0,20.0,0.0,0,1.0
377,1980-04-04 09:00,51.0,7.9,1023.5,3.6,1013.0,8.0,ScAc,0.0,9.6,8.7,9.2,9.1,2000.0,1.5,50.0,0.0,0,1.0
402,1980-04-10 15:00,39.0,7.4,1021.2,2.7,1010.9,0.0,no_clouds,0.0,22.2,17.7,12.8,11.0,2500.0,2.0,230.0,0.0,0,1.0
403,1980-04-10 21:00,45.0,5.5,1021.6,-1.4,1011.0,0.0,no_clouds,0.0,11.2,13.3,13.9,12.6,2000.0,3.8,250.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95822,2024-12-30 21:00:00,67.0,6.1,1016.0,0.0,1005.4,6.0,Ci,2.0,-0.1,-0.3,0.8,1.7,1053.0,2.7,270.0,0.0,0,
95823,2024-12-30 22:00:00,71.0,6.1,1016.2,0.0,1005.6,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,906.0,2.4,270.0,0.0,0,
95824,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
95825,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [243]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [244]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [245]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [246]:
rain_data

Unnamed: 0,hm,pv,td,pa,m005Te,m01Te,m02Te,m03Te,clfmAbbrCd,lcsCh,rain_tomorrow
25,27.0,6.4,0.6,1012.0,17.200000,14.750000,12.800000,12.600000,CsCi,0.0,1.0
26,56.0,17.5,15.4,1002.2,-1.957968,-1.459638,-0.303340,1.109255,ScAc,13.0,1.0
36,77.0,29.8,24.0,1005.5,28.400000,28.400000,27.500000,27.000000,ScAs,15.0,1.0
52,43.0,1.6,-17.1,1012.3,-2.055275,-1.624312,-0.438073,1.034404,no_clouds,0.0,1.0
87,82.0,23.2,19.9,994.7,25.600000,27.100000,26.600000,26.600000,ScAs,10.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
95702,62.0,17.1,15.1,996.2,-1.971598,-1.482704,-0.322212,1.098771,ScAs,12.0,1.0
95712,91.0,12.4,10.1,1000.7,17.800000,18.100000,17.900000,18.000000,StNs,6.0,1.0
95743,85.0,8.5,4.6,999.3,8.100000,8.000000,8.000000,7.800000,Ci,0.0,1.0
95747,96.0,30.0,24.1,994.9,25.900000,25.900000,25.000000,25.000000,StNs,4.0,1.0


In [247]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=len(rain_data)).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [248]:
data = pd.concat([rain_data, no_rain_data])

In [249]:
data = data.sample(frac=1).reset_index(drop=True)

In [250]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11656 entries, 0 to 11655
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             11656 non-null  float64
 1   pv             11656 non-null  float64
 2   td             11656 non-null  float64
 3   pa             11656 non-null  float64
 4   m005Te         11656 non-null  float64
 5   m01Te          11656 non-null  float64
 6   m02Te          11656 non-null  float64
 7   m03Te          11656 non-null  float64
 8   clfmAbbrCd     11656 non-null  object 
 9   lcsCh          11656 non-null  float64
 10  rain_tomorrow  11656 non-null  float64
dtypes: float64(10), object(1)
memory usage: 1001.8+ KB


In [251]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [252]:
split_index = round((len(data)/10)*7)

In [253]:
train_X = data.iloc[:split_index,:-1]
train_y = data.iloc[:split_index,-1]
test_X = data.iloc[split_index:,:-1]
test_y = data.iloc[split_index:,-1]

In [254]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [255]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((8159, 10), (3497, 10), (8159,), (3497,))

In [256]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_54 (Dense)            (None, 64)                704       
                                                                 
 dropout_45 (Dropout)        (None, 64)                0         
                                                                 
 dense_55 (Dense)            (None, 128)               8320      
                                                                 
 dropout_46 (Dropout)        (None, 128)               0         
                                                                 
 dense_56 (Dense)            (None, 256)               33024     
                                                                 
 dropout_47 (Dropout)        (None, 256)               0         
                                                                 
 dense_57 (Dense)            (None, 64)               

In [257]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-16 17:33:03.623831: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-16 17:33:06.050616: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [258]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

2025-01-16 17:33:48.020473: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6918


In [263]:
y_hat = (model.predict(test_X)>=0.5).reshape(-1)



In [264]:
test_y.shape, y_hat.shape

((3497,), (3497,))

In [265]:
f1 = f1_score(test_y, y_hat, average='binary')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.7873


# 12시간 단위 예측 

In [266]:
asos_df = pd.read_csv('/Volumes/ESD-ISO/project_preson/shared_data_preparation/ASOS_data_preprocessed_V3.csv')

In [267]:
# rain_tomorrow 열을 rainy_day 열에서 12시간 이후 값을 가져와 생성
asos_df['rain_tomorrow'] = asos_df['rainy_day'].shift(-12)

In [268]:
asos_df[asos_df['rain_tomorrow']!=0]

Unnamed: 0,tm,hm,pv,ps,td,pa,dc10Tca,clfmAbbrCd,lcsCh,m005Te,m01Te,m02Te,m03Te,vs,ws,wd,rn,rainy_day,rain_tomorrow
369,1980-04-02 09:00,50.0,4.2,1025.2,-5.0,1014.4,0.0,no_clouds,0.0,3.8,4.9,6.8,7.3,2200.0,3.0,270.0,0.0,0,1.0
370,1980-04-02 15:00,25.0,3.4,1023.9,-7.8,1013.3,0.0,no_clouds,0.0,16.6,12.1,8.6,7.5,2500.0,3.5,230.0,0.0,0,1.0
371,1980-04-02 21:00,31.0,3.0,1024.4,-9.4,1013.7,1.0,Sc,0.0,7.9,9.5,10.0,9.0,1500.0,1.0,270.0,0.0,0,1.0
396,1980-04-09 03:00,78.0,8.2,1016.3,4.1,1005.7,4.0,As,0.0,7.8,9.7,11.3,11.0,2000.0,3.0,340.0,0.0,0,1.0
397,1980-04-09 09:00,64.0,7.5,1018.1,2.9,1007.5,5.0,Ci,0.0,8.8,8.3,9.7,10.0,1500.0,1.0,270.0,0.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95822,2024-12-30 21:00:00,67.0,6.1,1016.0,0.0,1005.4,6.0,Ci,2.0,-0.1,-0.3,0.8,1.7,1053.0,2.7,270.0,0.0,0,
95823,2024-12-30 22:00:00,71.0,6.1,1016.2,0.0,1005.6,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,906.0,2.4,270.0,0.0,0,
95824,2024-12-30 23:00:00,75.0,6.1,1016.0,0.0,1005.4,0.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,754.0,0.7,250.0,0.0,0,
95825,2024-12-31 00:00:00,81.0,7.0,1015.8,1.8,1005.2,9.0,no_clouds,0.0,-0.1,-0.3,0.8,1.7,563.0,2.3,250.0,0.0,0,


In [269]:
asos_df.drop(index=asos_df[asos_df['rain_tomorrow'].isna()].index, inplace=True)

In [270]:
asos_df = asos_df.sample(frac=1, random_state=20250113).reset_index(drop=True)

In [271]:
rain_data = asos_df[asos_df['rain_tomorrow']==1].loc[:, ['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [272]:
rain_data

Unnamed: 0,hm,pv,td,pa,m005Te,m01Te,m02Te,m03Te,clfmAbbrCd,lcsCh,rain_tomorrow
17,90.0,19.8,17.3,995.4,19.60,21.50,22.30,22.70,ScAs,10.0,1.0
33,20.0,4.6,-3.8,1006.1,19.10,18.70,18.50,18.40,Ac,45.0,1.0
42,76.0,7.2,2.3,1007.5,6.15,7.20,8.10,7.85,Ac,40.0,1.0
95,64.0,3.8,-6.4,1017.1,-0.80,-0.55,-0.35,0.45,no_clouds,0.0,1.0
110,70.0,16.4,14.4,1000.4,24.30,23.90,22.70,21.40,Ci,14.2,1.0
...,...,...,...,...,...,...,...,...,...,...,...
95734,49.0,4.1,-5.4,1011.0,6.05,6.40,7.50,8.60,no_clouds,0.0,1.0
95741,96.0,30.0,24.1,994.9,25.90,25.90,25.00,25.00,StNs,4.0,1.0
95750,59.0,18.4,16.2,994.7,25.80,25.70,24.80,24.60,ScAs,9.0,1.0
95763,53.0,8.1,3.9,1007.5,13.60,14.50,14.70,14.30,As,30.0,1.0


In [273]:
no_rain_data = asos_df[asos_df['rain_tomorrow']==0].sample(n=len(rain_data)).loc[:,['hm','pv','td','pa','m005Te','m01Te','m02Te','m03Te','clfmAbbrCd','lcsCh','rain_tomorrow']]

In [274]:
data = pd.concat([rain_data, no_rain_data])

In [275]:
data = data.sample(frac=1).reset_index(drop=True)

In [276]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11656 entries, 0 to 11655
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   hm             11656 non-null  float64
 1   pv             11656 non-null  float64
 2   td             11656 non-null  float64
 3   pa             11656 non-null  float64
 4   m005Te         11656 non-null  float64
 5   m01Te          11656 non-null  float64
 6   m02Te          11656 non-null  float64
 7   m03Te          11656 non-null  float64
 8   clfmAbbrCd     11656 non-null  object 
 9   lcsCh          11656 non-null  float64
 10  rain_tomorrow  11656 non-null  float64
dtypes: float64(10), object(1)
memory usage: 1001.8+ KB


In [277]:
encoder = LabelEncoder()
data.clfmAbbrCd = encoder.fit_transform(data.clfmAbbrCd)

In [278]:
train_X = data.iloc[:5000,:-1]
train_y = data.iloc[:5000,-1]
test_X = data.iloc[5000:,:-1]
test_y = data.iloc[5000:,-1]

In [279]:
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)

In [280]:
train_X.shape, test_X.shape, train_y.shape, test_y.shape

((5000, 10), (6656, 10), (5000,), (6656,))

In [281]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_X.shape[1],)),  
    Dropout(0.3),
    Dense(128, activation='relu'),  
    Dropout(0.4),
    Dense(256, activation='relu'),  
    Dropout(0.4),
    Dense(64, activation='relu'),  
    Dropout(0.3),
    Dense(32, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam',  metrics=['accuracy',Recall(),Precision()])
# 모델 요약
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_60 (Dense)            (None, 64)                704       
                                                                 
 dropout_50 (Dropout)        (None, 64)                0         
                                                                 
 dense_61 (Dense)            (None, 128)               8320      
                                                                 
 dropout_51 (Dropout)        (None, 128)               0         
                                                                 
 dense_62 (Dense)            (None, 256)               33024     
                                                                 
 dropout_52 (Dropout)        (None, 256)               0         
                                                                 
 dense_63 (Dense)            (None, 64)              

In [282]:
history = model.fit(train_X, train_y, epochs=30, validation_split=0.2)

Epoch 1/30


2025-01-16 17:35:22.070413: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-01-16 17:35:23.147841: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [283]:
loss, accuracy, _, _, = model.evaluate(np.array(test_X), np.array(test_y))
print(f"테스트 정확도: {accuracy:.4f}")

2025-01-16 17:35:45.901137: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


테스트 정확도: 0.6454


In [284]:
y_hat = (model.predict(test_X)>=0.5).reshape(-1)



2025-01-16 17:35:48.626029: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [285]:
test_y.shape, y_hat.shape

((6656,), (6656,))

In [286]:
f1 = f1_score(test_y, y_hat, average='micro')
print(f"F1 스코어: {f1:.4f}")

F1 스코어: 0.6454
