In [13]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# conda install pytorch torchvision -c pytorch 
import tensorflow as tf
import torch
from torch.utils.data import TensorDataset # 텐서데이터셋
from torch.utils.data import DataLoader # 데이터로더
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import LeavePOut

# import MLPRegressor
# XGBRegressor
# LGBMRegressor
# CatBoostRegressor
# KNeighborsRegressor
# Lasso
# Ridge
# ElasticNet
# SGDRegressor

from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import SGDRegressor


In [14]:
# data/output_df 에서 파일명에 "spring" 이 들어간 파일을 불러오는 함수 만들기
def get_spring_df(path):
    file_list = os.listdir(path)
    file_list_spring = [file for file in file_list if 'spring' in file]
    df_list = []
    for file in file_list_spring:
        df = pd.read_csv(path + '/' + file)
        df_list.append(df)
        # 지역명 컬럼 추가하여 파일명에서 추출
        df['지역명'] = file.split('_')[0]
    df = pd.concat(df_list)
    return df

# 함수 실행
spring_location_df = get_spring_df('data/output_df')

# 함수 실행
spring_weather_df = get_spring_df('data/weather')

# 파일 안에 '-' 를 0으로 변경
spring_location_df = spring_location_df.replace('-', 0)
spring_weather_df = spring_weather_df.replace('-', 0)

# spring_location_df와 spring_weather_df를 '지역명' 컬럼을 기준으로 groupby, 일시를 기준으로 정렬 merge
spring_df = pd.merge(spring_location_df, spring_weather_df, on=['지역명', '일시'], how='left').sort_values(by=['지역명', '일시']).reset_index(drop=True)
spring_df= spring_df[["일시","지역명",	"일반봄배추:면적 (ha)",	"생산량 (톤)"	,"평균기온(°C)",	"최고기온(°C)",	"최저기온(°C)",	"월합강수량(00~24h만)(mm)"	,"합계 일사량(MJ/m2)"]]
spring_df

Unnamed: 0,일시,지역명,일반봄배추:면적 (ha),생산량 (톤),평균기온(°C),최고기온(°C),최저기온(°C),월합강수량(00~24h만)(mm),합계 일사량(MJ/m2)
0,2000,busan,0,0,9.1,18.3,-2.5,48.1,495.90
1,2000,busan,0,0,13.2,21.4,2.8,65.4,539.21
2,2000,busan,0,0,17.3,29.1,10.7,72.0,607.84
3,2001,busan,0,0,9.6,22.7,-3.3,6.1,439.57
4,2001,busan,0,0,15.0,25.3,3.8,42.4,539.17
...,...,...,...,...,...,...,...,...,...
5813,2020,ulsan,6,281,12.3,25.5,2.9,73.2,633.67
5814,2020,ulsan,6,281,18.1,28.7,9.2,51.5,614.27
5815,2021,ulsan,10,503,10.5,21.9,-0.8,138.9,497.10
5816,2021,ulsan,10,503,13.8,25.7,3.1,73.9,608.97


In [15]:
#  X_train, y_train, X_test, y_test 만들기
def make_train_test(df):
    # X, y 나누기
    X = df.drop(['일시', '지역명', '생산량 (톤)'], axis=1)
    y = df['생산량 (톤)']

    # X_train, X_test, y_train, y_test 나누기
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, y_train, X_test, y_test

# lstm 모델 생성
def lstm_model(X_train, y_train, X_test, y_test):
    # 데이터 정규화
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 데이터셋 생성
    X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
    X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

    # 모델 생성
    model = Sequential()
    model.add(LSTM(32, input_shape=(1, X_train_scaled.shape[2]), activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')

    # 모델 학습
    model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

    # 예측
    y_pred = model.predict(X_test_scaled)

    # 예측값 역정규화
    y_pred = scaler.inverse_transform(y_pred)
    y_test = scaler.inverse_transform(y_test.values.reshape(-1, 1))

    # 평가
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, rmse, mae, r2

# def lstm_model 함수 실행
X_train, y_train, X_test, y_test = make_train_test(spring_df)
mse, rmse, mae, r2 = lstm_model(X_train, y_train, X_test, y_test)

# 결과 출력
print('mse :', mse)
print('rmse :', rmse)
print('mae :', mae)
print('r2 :', r2)



Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-12-20 09:32:31.996807: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 09:32:31.997554: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).