# ver1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
import joblib
from datetime import datetime, timedelta
import xgboost as xgb
import lightgbm as lgb

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [None]:
# 데이터 로드 및 전처리
years = range(2019, 2024)
dfs = []
for year in years:
    df = pd.read_csv(f'/content/drive/MyDrive/수업_실습/fine_dust/기간별_일평균_대기환경_정보_{year}년.csv', encoding='cp949')
    df['측정일시'] = pd.to_datetime(df['측정일시'], format='%Y%m%d')
    df['계절'] = df['측정일시'].dt.month.map({12:0, 1:0, 2:0, 3:1, 4:1, 5:1, 6:2, 7:2, 8:2, 9:3, 10:3, 11:3})
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
data = data.ffill()  # 결측값 처리

# 특성 및 타겟 변수 설정
X = data[['NO2', 'O3', 'CO', 'SO2', '계절', '측정소명']]
y_pm10 = data[['미세먼지농도(㎍/㎥)']]
y_pm25 = data[['초미세먼지농도(㎍/㎥)']]

# 특성 엔지니어링을 위한 함수 정의
def add_engineered_features(X):
    X_ = X.copy()
    X_['NO2_O3_ratio'] = X_['NO2'] / X_['O3']
    X_['CO_SO2_ratio'] = X_['CO'] / X_['SO2']
    return X_

# 전처리 파이프라인 생성
numeric_features = ['NO2', 'O3', 'CO', 'SO2']
categorical_features = ['계절', '측정소명']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(drop='first', sparse=False), categorical_features)
    ])

# 전체 파이프라인 생성
pipeline = Pipeline([
    ('feature_engineering', FunctionTransformer(add_engineered_features)),
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('selector', SelectKBest(f_regression, k=20))  # k값은 조정 가능
])

# 데이터 변환
X_transformed = pipeline.fit_transform(X, y_pm10)

# 시계열 데이터 분할 (최근 20%를 테스트 셋으로 사용)
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train_pm10, y_test_pm10 = y_pm10[:split_index], y_pm10[split_index:]
y_train_pm25, y_test_pm25 = y_pm25[:split_index], y_pm25[split_index:]

# 파이프라인 학습 및 변환
pipeline.fit(X_train, y_train_pm10.values.ravel())
X_train_transformed = pipeline.transform(X_train)
X_test_transformed = pipeline.transform(X_test)


# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(random_state=42),
    'LightGBM': lgb.LGBMRegressor(random_state=42)
}

# PM10 모델 학습 및 평가
results_pm10 = {}
for name, model in models.items():
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    pipeline.fit(X_train, y_train_pm10.values.ravel())
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test_pm10, y_pred)
    mae = mean_absolute_error(y_test_pm10, y_pred)
    r2 = r2_score(y_test_pm10, y_pred)
    results_pm10[name] = {'MSE': mse, 'MAE': mae, 'R2': r2}

print("PM10 Model Evaluation:")
print(pd.DataFrame(results_pm10))

# PM2.5 모델 학습 및 평가
results_pm25 = {}
for name, model in models.items():
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    pipeline.fit(X_train, y_train_pm25.values.ravel())
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test_pm25, y_pred)
    mae = mean_absolute_error(y_test_pm25, y_pred)
    r2 = r2_score(y_test_pm25, y_pred)
    results_pm25[name] = {'MSE': mse, 'MAE': mae, 'R2': r2}

print("\nPM2.5 Model Evaluation:")
print(pd.DataFrame(results_pm25))

# 최적 모델 선택 및 저장
best_model_pm10 = max(results_pm10, key=lambda x: results_pm10[x]['R2'])
best_model_pm25 = max(results_pm25, key=lambda x: results_pm25[x]['R2'])

print(f"\nBest PM10 Model: {best_model_pm10}")
print(f"Best PM2.5 Model: {best_model_pm25}")

  y = column_or_1d(y, warn=True)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 252
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 31
[LightGBM] [Info] Start training from score 36.816457
PM10 Model Evaluation:
     Linear Regression  Decision Tree  Random Forest  Gradient Boosting  \
MSE         467.216780    1078.842455     554.830216         464.758376   
MAE          11.651004      16.609310      12.793667          11.426831   
R2            0.353006      -0.493963       0.231681           0.356410   

        XGBoost    LightGBM  
MSE  533.124039  467.831571  
MAE   12.295691   11.413865  
R2     0.261739    0.352155  




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000803 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 252
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 31
[LightGBM] [Info] Start training from score 20.848001

PM2.5 Model Evaluation:
     Linear Regression  Decision Tree  Random Forest  Gradient Boosting  \
MSE          65.461005     128.197959      66.970862          59.888006   
MAE           5.802830       7.792753       5.859079           5.462875   
R2            0.614570       0.245178       0.605680           0.647383   

       XGBoost   LightGBM  
MSE  70.764245  57.626616  
MAE   5.881052   5.373602  
R2    0.583344   0.660698  

Best PM10 Model: Gradient Boosting
Best PM2.5 Model: LightGBM


In [None]:
# 전체 파이프라인 생성 및 학습
pipeline_pm10 = Pipeline([
    ('feature_engineering', FunctionTransformer(add_engineered_features)),
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('selector', SelectKBest(f_regression, k=20)),
    ('regressor', models[best_model_pm10])
])
pipeline_pm10.fit(X_train, y_train_pm10.values.ravel())

pipeline_pm25 = Pipeline([
    ('feature_engineering', FunctionTransformer(add_engineered_features)),
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('selector', SelectKBest(f_regression, k=20)),
    ('regressor', models[best_model_pm25])
])
pipeline_pm25.fit(X_train, y_train_pm25.values.ravel())

# 전체 파이프라인 저장
joblib.dump(pipeline_pm10, f'{best_model_pm10}_pipeline_pm10.joblib')
joblib.dump(pipeline_pm25, f'{best_model_pm25}_pipeline_pm25.joblib')
print(f"\nBest models have been saved: {best_model_pm10}_model_pm10.joblib and {best_model_pm25}_model_pm25.joblib")



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001934 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1594
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 20
[LightGBM] [Info] Start training from score 20.848001

Best models have been saved: Gradient Boosting_model_pm10.joblib and LightGBM_model_pm25.joblib


In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import joblib
import numpy as np

def get_realtime_data():
    url = 'http://openAPI.seoul.go.kr:8088/6d6d524c626869393635704d4a5667/json/RealtimeCityAir/1/30/'
    res = requests.get(url)
    data = res.json()
    return data['RealtimeCityAir']['row']

def preprocess_realtime_data(realtime_data):
    df = pd.DataFrame(realtime_data)
    df['계절'] = datetime.now().month % 12 // 3
    df['NO2'] = df['NO2'].astype(float)
    df['O3'] = df['O3'].astype(float)
    df['CO'] = df['CO'].astype(float)
    df['SO2'] = df['SO2'].astype(float)
    df = df.rename(columns={'MSRSTE_NM': '측정소명'})
    return df[['NO2', 'O3', 'CO', 'SO2', '계절', '측정소명', 'PM10', 'PM25']]

# 파이프라인 로드
pipeline_pm10 = joblib.load('Gradient Boosting_pipeline_pm10.joblib')
pipeline_pm25 = joblib.load('LightGBM_pipeline_pm25.joblib')

# 실시간 데이터 가져오기 및 전처리
realtime_data = get_realtime_data()
X_realtime = preprocess_realtime_data(realtime_data)

# 예측
today = datetime.now().date()
future_dates = [today + timedelta(days=i) for i in range(1, 7)]  # 내일부터 6일간

for _, row in X_realtime.iterrows():
    district = row['측정소명']

    # 실제 오늘의 데이터 출력
    print(f"\n{district} 실시간 데이터 ({today}):")
    print(f"미세먼지: {float(row['PM10']):.3f}, 초미세먼지: {float(row['PM25']):.3f}")

    # 미래 6일간의 예측
    X_future = pd.DataFrame([row[['NO2', 'O3', 'CO', 'SO2', '계절', '측정소명']].tolist()] * 6,
                            columns=['NO2', 'O3', 'CO', 'SO2', '계절', '측정소명'])

    # 날짜에 따라 계절 값 변경
    for i, date in enumerate(future_dates):
        X_future.iloc[i, X_future.columns.get_loc('계절')] = date.month % 12 // 3

    # 예측에 약간의 변동성 추가
    pm10_pred = pipeline_pm10.predict(X_future) + np.random.normal(0, 1, 6)
    pm25_pred = pipeline_pm25.predict(X_future) + np.random.normal(0, 0.5, 6)

    result_df = pd.DataFrame({
        '미세먼지': pm10_pred.flatten(),
        '초미세먼지': pm25_pred.flatten()
    }, index=[date.strftime('%Y-%m-%d') for date in future_dates])

    print(f"\n{district} 미세먼지 예측 (내일부터 6일간):")
    print(result_df.round(3))  # 소수점 3자리까지 반올림


중구 실시간 데이터 (2024-09-18):
미세먼지: 14.000, 초미세먼지: 9.000

중구 미세먼지 예측 (내일부터 6일간):
              미세먼지  초미세먼지
2024-09-19  11.675  4.437
2024-09-20  11.315  4.716
2024-09-21  12.017  4.573
2024-09-22  14.585  4.704
2024-09-23   9.897  5.362
2024-09-24  12.392  4.786

종로구 실시간 데이터 (2024-09-18):
미세먼지: 13.000, 초미세먼지: 9.000

종로구 미세먼지 예측 (내일부터 6일간):
              미세먼지  초미세먼지
2024-09-19  11.675  4.437
2024-09-20  11.315  4.716
2024-09-21  12.017  4.573
2024-09-22  14.585  4.704
2024-09-23   9.897  5.362
2024-09-24  12.392  4.786

용산구 실시간 데이터 (2024-09-18):
미세먼지: 15.000, 초미세먼지: 12.000

용산구 미세먼지 예측 (내일부터 6일간):
              미세먼지  초미세먼지
2024-09-19  11.675  4.437
2024-09-20  11.315  4.716
2024-09-21  12.017  4.573
2024-09-22  14.585  4.704
2024-09-23   9.897  5.362
2024-09-24  12.392  4.786

은평구 실시간 데이터 (2024-09-18):
미세먼지: 10.000, 초미세먼지: 9.000

은평구 미세먼지 예측 (내일부터 6일간):
              미세먼지  초미세먼지
2024-09-19  11.675  4.437
2024-09-20  11.315  4.716
2024-09-21  12.017  4.573
2024-09-22  14.585  4.704
2024-09-2

# ver2

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import FunctionTransformer, PolynomialFeatures
import xgboost as xgb
import lightgbm as lgb
import joblib

# 데이터 로드 및 전처리
years = range(2019, 2024)
dfs = []
for year in years:
    df = pd.read_csv(f'/content/drive/MyDrive/수업_실습/fine_dust/기간별_일평균_대기환경_정보_{year}년.csv', encoding='cp949')
    df['측정일시'] = pd.to_datetime(df['측정일시'], format='%Y%m%d')
    df['계절'] = df['측정일시'].dt.month.map({12:0, 1:0, 2:0, 3:1, 4:1, 5:1, 6:2, 7:2, 8:2, 9:3, 10:3, 11:3})
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
data = data.ffill()  # 결측값 처리

# 시계열 특성 추가
data['요일'] = data['측정일시'].dt.dayofweek
data['월'] = data['측정일시'].dt.month

# 지수 가중 이동 평균 특성 추가
for col in ['NO2', 'O3', 'CO', 'SO2', '미세먼지농도(㎍/㎥)', '초미세먼지농도(㎍/㎥)']:
    data[f'{col}_EWM7'] = data.groupby('측정소명')[col].transform(lambda x: x.ewm(span=7).mean())
    data[f'{col}_EWM30'] = data.groupby('측정소명')[col].transform(lambda x: x.ewm(span=30).mean())

# 특성 및 타겟 변수 설정
feature_columns = ['NO2', 'O3', 'CO', 'SO2', '계절', '측정소명', '요일', '월',
                   'NO2_EWM7', 'O3_EWM7', 'CO_EWM7', 'SO2_EWM7',
                   'NO2_EWM30', 'O3_EWM30', 'CO_EWM30', 'SO2_EWM30']
X = data[feature_columns]
y_pm10 = data['미세먼지농도(㎍/㎥)']
y_pm25 = data['초미세먼지농도(㎍/㎥)']

# 특성 엔지니어링을 위한 함수 정의
def add_engineered_features(X):
    X_ = X.copy()
    X_['NO2_O3_ratio'] = X_['NO2'] / X_['O3']
    X_['CO_SO2_ratio'] = X_['CO'] / X_['SO2']
    return X_

# 전처리 파이프라인 생성
numeric_features = ['NO2', 'O3', 'CO', 'SO2', 'NO2_EWM7', 'O3_EWM7', 'CO_EWM7', 'SO2_EWM7',
                    'NO2_EWM30', 'O3_EWM30', 'CO_EWM30', 'SO2_EWM30']
categorical_features = ['계절', '측정소명', '요일', '월']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(drop='first', sparse=False), categorical_features)
    ])

# 전체 파이프라인 생성
pipeline = Pipeline([
    ('feature_engineering', FunctionTransformer(add_engineered_features)),
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('selector', SelectKBest(f_regression, k=30))  # k값 증가
])

# 시계열 데이터 분할 (최근 20%를 테스트 셋으로 사용)
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train_pm10, y_test_pm10 = y_pm10[:split_index], y_pm10[split_index:]
y_train_pm25, y_test_pm25 = y_pm25[:split_index], y_pm25[split_index:]

# 파이프라인 학습 및 변환
pipeline.fit(X_train, y_train_pm10)
X_train_transformed = pipeline.transform(X_train)
X_test_transformed = pipeline.transform(X_test)

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(random_state=42),
    'LightGBM': lgb.LGBMRegressor(random_state=42)
}

# XGBoost 하이퍼파라미터 튜닝
xgb_param_dist = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7],
    'subsample': [0.6, 0.8, 1.0]
}

xgb_random = RandomizedSearchCV(
    models['XGBoost'], param_distributions=xgb_param_dist,
    n_iter=10, cv=3, random_state=42, n_jobs=-1
)

# 모델 학습 및 평가 함수
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return {'MSE': mse, 'MAE': mae, 'R2': r2}

# PM10 모델 학습 및 평가
results_pm10 = {}
for name, model in models.items():
    if name == 'XGBoost':
        model = xgb_random
    pipeline_model = Pipeline([('pipeline', pipeline), ('model', model)])
    results_pm10[name] = train_and_evaluate(pipeline_model, X_train, y_train_pm10, X_test, y_test_pm10)

# 앙상블 모델 (PM10)
estimators = [(name, Pipeline([('pipeline', pipeline), ('model', model)]))
              for name, model in models.items() if name != 'XGBoost']
estimators.append(('XGBoost', Pipeline([('pipeline', pipeline), ('model', xgb_random.best_estimator_)])))
ensemble = VotingRegressor(estimators)
results_pm10['Ensemble'] = train_and_evaluate(ensemble, X_train, y_train_pm10, X_test, y_test_pm10)

print("PM10 Model Evaluation:")
print(pd.DataFrame(results_pm10))

# 최적 모델 저장
joblib.dump(ensemble, 'ensemble_model_pm10.joblib')
print("\nEnsemble model has been saved.")

# PM2.5 모델 학습 및 평가
results_pm25 = {}
for name, model in models.items():
    if name == 'XGBoost':
        model = xgb_random
    pipeline_model = Pipeline([('pipeline', pipeline), ('model', model)])
    results_pm25[name] = train_and_evaluate(pipeline_model, X_train, y_train_pm25, X_test, y_test_pm25)

# 앙상블 모델 (PM2.5)
ensemble = VotingRegressor(estimators)
results_pm25['Ensemble'] = train_and_evaluate(ensemble, X_train, y_train_pm25, X_test, y_test_pm25)

print("\nPM2.5 Model Evaluation:")
print(pd.DataFrame(results_pm25))

# 최적 모델 선택 및 저장
best_model_pm10 = max(results_pm10, key=lambda x: results_pm10[x]['R2'])
best_model_pm25 = max(results_pm25, key=lambda x: results_pm25[x]['R2'])

print(f"\nBest PM10 Model: {best_model_pm10}")
print(f"Best PM2.5 Model: {best_model_pm25}")

# 최적 모델 저장
joblib.dump(ensemble, 'ensemble_model_25.joblib')
print("\nEnsemble model has been saved.")



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004257 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4878
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 30
[LightGBM] [Info] Start training from score 36.816457




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004401 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4878
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 30
[LightGBM] [Info] Start training from score 36.816457




PM10 Model Evaluation:
     Linear Regression  Decision Tree  Random Forest  Gradient Boosting  \
MSE         479.945601    1704.503286     510.035002         484.023110   
MAE          11.829278      20.230340      13.273090          12.123605   
R2            0.335379      -1.360367       0.293712           0.329733   

        XGBoost    LightGBM    Ensemble  
MSE  526.167857  492.820166  505.560451  
MAE   12.677612   12.240842   12.505065  
R2     0.271372    0.317551    0.299908  

Ensemble model has been saved.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019540 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5917
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 30
[LightGBM] [Info] Start training from score 20.848001




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5917
[LightGBM] [Info] Number of data points in the train set: 36520, number of used features: 30
[LightGBM] [Info] Start training from score 20.848001





PM2.5 Model Evaluation:
     Linear Regression  Decision Tree  Random Forest  Gradient Boosting  \
MSE          71.620690     177.483790      69.039762          69.887768   
MAE           6.005858       8.968346       5.937748           5.870925   
R2            0.578302      -0.045014       0.593498           0.588505   

      XGBoost   LightGBM   Ensemble  
MSE  78.89858  64.312454  66.941820  
MAE   6.20294   5.659771   5.759744  
R2    0.53545   0.621332   0.605851  

Best PM10 Model: Linear Regression
Best PM2.5 Model: LightGBM

Ensemble model has been saved.


In [None]:
joblib.dump(pipeline_pm10, f'{best_model_pm10}_pipeline_pm10.joblib')
joblib.dump(pipeline_pm25, f'{best_model_pm25}_pipeline_pm25.joblib')

# prophet

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from prophet import Prophet
import joblib

# 데이터 로드 및 전처리
years = range(2019, 2024)
dfs = []
for year in years:
    df = pd.read_csv(f'/content/drive/MyDrive/수업_실습/fine_dust/기간별_일평균_대기환경_정보_{year}년.csv', encoding='cp949')
    df['측정일시'] = pd.to_datetime(df['측정일시'], format='%Y%m%d')
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
data = data.ffill()  # 결측값 처리

# Prophet 모델을 위한 데이터 준비 함수
def prepare_data_for_prophet(data, target_column):
    df = data[['측정일시', target_column, '측정소명']].copy()
    df.columns = ['ds', 'y', 'location']
    return df

# 모델 학습 및 예측 함수
def train_and_predict_prophet(train_data, test_data):
    model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    model.add_country_holidays(country_name='KR')
    model.fit(train_data)

    future = test_data[['ds']]
    forecast = model.predict(future)

    return model, forecast

# 평가 함수
def evaluate_prophet(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'MSE': mse, 'MAE': mae, 'R2': r2}

# PM10 모델 학습 및 평가
pm10_data = prepare_data_for_prophet(data, '미세먼지농도(㎍/㎥)')
train_data_pm10 = pm10_data[pm10_data['ds'] < '2023-01-01']
test_data_pm10 = pm10_data[pm10_data['ds'] >= '2023-01-01']

model_pm10, forecast_pm10 = train_and_predict_prophet(train_data_pm10, test_data_pm10)
results_pm10 = evaluate_prophet(test_data_pm10['y'], forecast_pm10['yhat'])

print("PM10 Model Evaluation:")
print(pd.DataFrame([results_pm10]))

# PM2.5 모델 학습 및 평가
pm25_data = prepare_data_for_prophet(data, '초미세먼지농도(㎍/㎥)')
train_data_pm25 = pm25_data[pm25_data['ds'] < '2023-01-01']
test_data_pm25 = pm25_data[pm25_data['ds'] >= '2023-01-01']

model_pm25, forecast_pm25 = train_and_predict_prophet(train_data_pm25, test_data_pm25)
results_pm25 = evaluate_prophet(test_data_pm25['y'], forecast_pm25['yhat'])

print("\nPM2.5 Model Evaluation:")
print(pd.DataFrame([results_pm25]))

# 모델 저장
joblib.dump(model_pm10, 'prophet_model_pm10.joblib')
joblib.dump(model_pm25, 'prophet_model_pm25.joblib')
print("\nProphet models have been saved.")

DEBUG:cmdstanpy:input tempfile: /tmp/tmpnmqrswjg/j0v9w12h.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnmqrswjg/usjso4uj.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=83691', 'data', 'file=/tmp/tmpnmqrswjg/j0v9w12h.json', 'init=/tmp/tmpnmqrswjg/usjso4uj.json', 'output', 'file=/tmp/tmpnmqrswjg/prophet_model2lg7m2_f/prophet_model-20240918165946.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
16:59:46 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:00:16 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


PM10 Model Evaluation:
          MSE       MAE        R2
0  646.281351  15.36355  0.105491


DEBUG:cmdstanpy:input tempfile: /tmp/tmpnmqrswjg/hr25o8ru.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnmqrswjg/djz9cvlg.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34545', 'data', 'file=/tmp/tmpnmqrswjg/hr25o8ru.json', 'init=/tmp/tmpnmqrswjg/djz9cvlg.json', 'output', 'file=/tmp/tmpnmqrswjg/prophet_model86rfw79v/prophet_model-20240918170025.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:00:25 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:00:45 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



PM2.5 Model Evaluation:
          MSE       MAE        R2
0  157.915408  8.924087  0.069858

Prophet models have been saved.


In [None]:
# 예측 시각화 (선택사항)
from prophet.plot import plot_plotly
import plotly.offline as py

fig_pm10 = plot_plotly(model_pm10, forecast_pm10)
py.iplot(fig_pm10)

fig_pm25 = plot_plotly(model_pm25, forecast_pm25)
py.iplot(fig_pm25)

# LSTM

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import Callback
import joblib

class R2ScoreCallback(Callback):
    def __init__(self, validation_data):
        self.x_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.x_val)
        r2 = r2_score(self.y_val, y_pred)
        logs['val_r2'] = r2
        print(f' - val_r2: {r2:.4f}')

# 데이터 로드 및 전처리
years = range(2019, 2024)
dfs = []
for year in years:
    df = pd.read_csv(f'/content/drive/MyDrive/수업_실습/fine_dust/기간별_일평균_대기환경_정보_{year}년.csv', encoding='cp949')
    df['측정일시'] = pd.to_datetime(df['측정일시'], format='%Y%m%d')
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
data = data.ffill()  # 결측값 처리

# 특성 선택
features = ['NO2', 'O3', 'CO', 'SO2', '미세먼지농도(㎍/㎥)', '초미세먼지농도(㎍/㎥)']
dataset = data[features]

# 데이터 정규화
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

# 시계열 데이터 생성 함수
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), :]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 4:6])  # PM10과 PM2.5
    return np.array(dataX), np.array(dataY)

# 데이터셋 생성
time_step = 30
X, y = create_dataset(scaled_data, time_step)

# 훈련/테스트 세트 분할
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# LSTM 모델 생성 함수
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        LSTM(50, return_sequences=False),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=[])
    return model

# PM10 모델 생성 및 학습
model_pm10 = create_lstm_model((X_train.shape[1], X_train.shape[2]))
r2_callback_pm10 = R2ScoreCallback((X_test, y_test[:, 0]))
history_pm10 = model_pm10.fit(X_train, y_train[:, 0], validation_data=(X_test, y_test[:, 0]),
                              epochs=50, batch_size=64, verbose=1, callbacks=[r2_callback_pm10])

# PM2.5 모델 생성 및 학습
model_pm25 = create_lstm_model((X_train.shape[1], X_train.shape[2]))
r2_callback_pm25 = R2ScoreCallback((X_test, y_test[:, 1]))
history_pm25 = model_pm25.fit(X_train, y_train[:, 1], validation_data=(X_test, y_test[:, 1]),
                              epochs=50, batch_size=64, verbose=1, callbacks=[r2_callback_pm25])

# 예측
train_predict_pm10 = model_pm10.predict(X_train)
test_predict_pm10 = model_pm10.predict(X_test)

train_predict_pm25 = model_pm25.predict(X_train)
test_predict_pm25 = model_pm25.predict(X_test)

# 역정규화
train_predict_pm10 = scaler.inverse_transform(np.hstack([np.zeros((train_predict_pm10.shape[0], 4)), train_predict_pm10, np.zeros((train_predict_pm10.shape[0], 1))]))[:, 4]
test_predict_pm10 = scaler.inverse_transform(np.hstack([np.zeros((test_predict_pm10.shape[0], 4)), test_predict_pm10, np.zeros((test_predict_pm10.shape[0], 1))]))[:, 4]

train_predict_pm25 = scaler.inverse_transform(np.hstack([np.zeros((train_predict_pm25.shape[0], 5)), train_predict_pm25]))[:, 5]
test_predict_pm25 = scaler.inverse_transform(np.hstack([np.zeros((test_predict_pm25.shape[0], 5)), test_predict_pm25]))[:, 5]

y_train_inv_pm10 = scaler.inverse_transform(np.hstack([np.zeros((y_train.shape[0], 4)), y_train[:, 0:1], np.zeros((y_train.shape[0], 1))]))[:, 4]
y_test_inv_pm10 = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], 4)), y_test[:, 0:1], np.zeros((y_test.shape[0], 1))]))[:, 4]

y_train_inv_pm25 = scaler.inverse_transform(np.hstack([np.zeros((y_train.shape[0], 5)), y_train[:, 1:2]]))[:, 5]
y_test_inv_pm25 = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], 5)), y_test[:, 1:2]]))[:, 5]

# 평가
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'MSE': mse, 'MAE': mae, 'R2': r2}

results_pm10 = evaluate_model(y_test_inv_pm10, test_predict_pm10)
results_pm25 = evaluate_model(y_test_inv_pm25, test_predict_pm25)

print("PM10 Model Evaluation:")
print(pd.DataFrame([results_pm10]))

print("\nPM2.5 Model Evaluation:")
print(pd.DataFrame([results_pm25]))

# 학습 과정 출력
print("\nPM10 Training History:")
print(pd.DataFrame(history_pm10.history))

print("\nPM2.5 Training History:")
print(pd.DataFrame(history_pm25.history))

# 모델 저장
model_pm10.save('lstm_model_pm10.h5')
model_pm25.save('lstm_model_pm25.h5')
joblib.dump(scaler, 'scaler.joblib')
print("\nLSTM models and scaler have been saved.")

Epoch 1/50


  super().__init__(**kwargs)


[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
 - val_r2: 0.8839
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 50ms/step - loss: 7.4435e-04 - val_loss: 3.3146e-04 - val_r2: 0.8839
Epoch 2/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step
 - val_r2: 0.8925
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 57ms/step - loss: 3.9680e-04 - val_loss: 3.0712e-04 - val_r2: 0.8925
Epoch 3/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step
 - val_r2: 0.9035
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 49ms/step - loss: 3.4153e-04 - val_loss: 2.7566e-04 - val_r2: 0.9035
Epoch 4/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
 - val_r2: 0.9146
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 49ms/step - loss: 3.5539e-04 - val_loss: 2.4382e-04 - val_r2: 0.9146
Epoch 5/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step
 - val_r2: 0.8780
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 46ms/step - loss: 0.0017 - val_loss: 8.8523e-04 - val_r2: 0.8780
Epoch 2/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
 - val_r2: 0.8751
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 45ms/step - loss: 0.0011 - val_loss: 9.0566e-04 - val_r2: 0.8751
Epoch 3/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
 - val_r2: 0.8826
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - loss: 0.0011 - val_loss: 8.5153e-04 - val_r2: 0.8826
Epoch 4/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
 - val_r2: 0.8814
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 44ms/step - loss: 0.0011 - val_loss: 8.6005e-04 - val_r2: 0.8814
Epoch 5/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



PM10 Model Evaluation:
         MSE       MAE        R2
0  60.409923  4.749371  0.916393

PM2.5 Model Evaluation:
         MSE      MAE        R2
0  20.535419  3.12077  0.879056

PM10 Training History:
        loss  val_loss    val_r2
0   0.000554  0.000331  0.883934
1   0.000406  0.000307  0.892459
2   0.000369  0.000276  0.903473
3   0.000356  0.000244  0.914623
4   0.000333  0.000586  0.794927
5   0.000346  0.000251  0.912168
6   0.000329  0.000247  0.913424
7   0.000333  0.000245  0.914060
8   0.000334  0.000238  0.916741
9   0.000328  0.000241  0.915615
10  0.000327  0.000310  0.891516
11  0.000326  0.000262  0.908404
12  0.000336  0.000242  0.915430
13  0.000326  0.000283  0.900906
14  0.000326  0.000232  0.918828
15  0.000317  0.000243  0.914757
16  0.000314  0.000268  0.906059
17  0.000324  0.000430  0.849585
18  0.000327  0.000231  0.919093
19  0.000318  0.000243  0.914950
20  0.000312  0.000234  0.918078
21  0.000320  0.000246  0.913743
22  0.000316  0.000240  0.915953
23  0.

# Transformer

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D
from tensorflow.keras.callbacks import Callback
import joblib

class R2ScoreCallback(Callback):
    def __init__(self, validation_data):
        self.x_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.x_val)
        r2 = r2_score(self.y_val, y_pred)
        logs['val_r2'] = r2
        print(f' - val_r2: {r2:.4f}')

# 데이터 로드 및 전처리
years = range(2019, 2024)
dfs = []
for year in years:
    df = pd.read_csv(f'/content/drive/MyDrive/수업_실습/fine_dust/기간별_일평균_대기환경_정보_{year}년.csv', encoding='cp949')
    df['측정일시'] = pd.to_datetime(df['측정일시'], format='%Y%m%d')
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
data = data.ffill()  # 결측값 처리

# 특성 선택
features = ['NO2', 'O3', 'CO', 'SO2', '미세먼지농도(㎍/㎥)', '초미세먼지농도(㎍/㎥)']
dataset = data[features]

# 데이터 정규화
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

# 시계열 데이터 생성 함수
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), :]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 4:6])  # PM10과 PM2.5
    return np.array(dataX), np.array(dataY)

# 데이터셋 생성
time_step = 30
X, y = create_dataset(scaled_data, time_step)

# 훈련/테스트 세트 분할
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Transformer 모델 생성
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs
    x = Dense(ff_dim, activation="relu")(res)
    x = Dense(inputs.shape[-1])(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    return x + res

def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(mlp_dropout)(x)
    outputs = Dense(1)(x)
    return Model(inputs, outputs)

# 모델 파라미터
input_shape = X_train.shape[1:]
head_size = 256
num_heads = 4
ff_dim = 4
num_transformer_blocks = 4
mlp_units = [128]
mlp_dropout = 0.4
dropout = 0.25

# PM10 모델 생성 및 학습
model_pm10 = build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout)
model_pm10.compile(optimizer='adam', loss='mse')
r2_callback_pm10 = R2ScoreCallback((X_test, y_test[:, 0]))
history_pm10 = model_pm10.fit(X_train, y_train[:, 0], validation_data=(X_test, y_test[:, 0]),
                              epochs=50, batch_size=64, verbose=1, callbacks=[r2_callback_pm10])

# PM2.5 모델 생성 및 학습
model_pm25 = build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout)
model_pm25.compile(optimizer='adam', loss='mse')
r2_callback_pm25 = R2ScoreCallback((X_test, y_test[:, 1]))
history_pm25 = model_pm25.fit(X_train, y_train[:, 1], validation_data=(X_test, y_test[:, 1]),
                              epochs=50, batch_size=64, verbose=1, callbacks=[r2_callback_pm25])

# 예측
train_predict_pm10 = model_pm10.predict(X_train)
test_predict_pm10 = model_pm10.predict(X_test)

train_predict_pm25 = model_pm25.predict(X_train)
test_predict_pm25 = model_pm25.predict(X_test)

# 역정규화
train_predict_pm10 = scaler.inverse_transform(np.hstack([np.zeros((train_predict_pm10.shape[0], 4)), train_predict_pm10, np.zeros((train_predict_pm10.shape[0], 1))]))[:, 4]
test_predict_pm10 = scaler.inverse_transform(np.hstack([np.zeros((test_predict_pm10.shape[0], 4)), test_predict_pm10, np.zeros((test_predict_pm10.shape[0], 1))]))[:, 4]

train_predict_pm25 = scaler.inverse_transform(np.hstack([np.zeros((train_predict_pm25.shape[0], 5)), train_predict_pm25]))[:, 5]
test_predict_pm25 = scaler.inverse_transform(np.hstack([np.zeros((test_predict_pm25.shape[0], 5)), test_predict_pm25]))[:, 5]

y_train_inv_pm10 = scaler.inverse_transform(np.hstack([np.zeros((y_train.shape[0], 4)), y_train[:, 0:1], np.zeros((y_train.shape[0], 1))]))[:, 4]
y_test_inv_pm10 = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], 4)), y_test[:, 0:1], np.zeros((y_test.shape[0], 1))]))[:, 4]

y_train_inv_pm25 = scaler.inverse_transform(np.hstack([np.zeros((y_train.shape[0], 5)), y_train[:, 1:2]]))[:, 5]
y_test_inv_pm25 = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], 5)), y_test[:, 1:2]]))[:, 5]

# 평가
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'MSE': mse, 'MAE': mae, 'R2': r2}

results_pm10 = evaluate_model(y_test_inv_pm10, test_predict_pm10)
results_pm25 = evaluate_model(y_test_inv_pm25, test_predict_pm25)

print("PM10 Model Evaluation:")
print(pd.DataFrame([results_pm10]))

print("\nPM2.5 Model Evaluation:")
print(pd.DataFrame([results_pm25]))

# 학습 과정 출력
print("\nPM10 Training History:")
print(pd.DataFrame(history_pm10.history))

print("\nPM2.5 Training History:")
print(pd.DataFrame(history_pm25.history))

# 모델 저장
model_pm10.save('transformer_model_pm10.h5')
model_pm25.save('transformer_model_pm25.h5')
joblib.dump(scaler, 'scaler_transformer.joblib')
print("\nTransformer models and scaler have been saved.")

Epoch 1/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 79ms/step
 - val_r2: 0.7781
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 550ms/step - loss: 0.0021 - val_loss: 6.3364e-04 - val_r2: 0.7781
Epoch 2/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 78ms/step
 - val_r2: 0.6705
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 537ms/step - loss: 8.8091e-04 - val_loss: 9.4088e-04 - val_r2: 0.6705
Epoch 3/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 79ms/step
 - val_r2: 0.8036
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 535ms/step - loss: 7.6906e-04 - val_loss: 5.6077e-04 - val_r2: 0.8036
Epoch 4/50
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 79ms/step
 - val_r2: 0.8172
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 498ms/step - loss: 6.9393e-04 - val_loss: 5.2195e-04 - val_r2: 0.8172
Epoch 5/50
[1m286/286[0m 