<a href="https://colab.research.google.com/github/DaeSeokSong/LSTM-PPoA/blob/main/Predict_Price_of_Agricultural.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 당년 농산물 가격 예측 LSTM 모델

Input = 1~저번 달까지의 pram 값을 하나로 묶은 array(인스턴스)

output = 당월의 해당 채소 가격

layer = 원래 해당 채소 가격

### Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/MyDrive/DeepLearning/Project/PPoA
!ls -al

Mounted at /content/gdrive
/content/gdrive/MyDrive/DeepLearning/Project/PPoA
total 16
drwx------ 2 root root  4096 Nov 24 06:07  Dataset
-rw------- 1 root root 11495 Nov 24 08:38 'Predict Price of Agricultural.ipynb'


### Import


In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from keras.utils import *
from sklearn.preprocessing import *

# Function

In [28]:
def Normalizer(targetData) :
    return (targetData - targetData.min()) / ( targetData.max() - targetData.min())

### Data read

In [49]:
f_name = '2006_onion.csv'
df = pd.read_csv('./Dataset/'+f_name, index_col=0)

''' 기상변수 '''
# 강수량
precipi_avg = df['평균월강수량(mm)']
precipi_max = df['최다월강수량(mm)']
# 기온
temper_avg = df['평균기온(℃)']
temper_max = df['평균최고기온(℃)']
temper_min = df['평균최저기온(℃)']
# 풍속
windSpeed_avg = df['평균풍속(m/s)']
windSpeed_max = df['최대풍속(m/s)']
# 습도
humidity_avg = df['평균습도(%rh)']
humidity_min = df['최저습도(%rh)']
# 일조량 / 일사량
sunshine = df['일조합']
insolation = df['일사합']

''' 기타변수 '''
# 전년 재배면적
last_cultiv_area = df['전년생산면적']
# 전년 평균 생산량(수확량)
last_production = df['전년생산량']
# 전년 평균 생산단수
last_prod_unit = df['전년생산단수']
# 전년 수입량
last_amount_import = df['전년수입량']
# 해당 농작물 가격
crops_price = df['가격']
# 경유 가격
diesel_price = df['경유가격']
# 물가지수(price index -> pidx), 2015년 기준 얼마나 오르고 내렸는지
total_pidx = df['총 물가지수']
prod_pidx = df['상품']
agricul_marine_prod_pidx = df['농축수산물']
indust_prod_pidx = df['공업제품']
serv_pidx = df['서비스']
pub_serv_pidx = df['공공서비스']
per_serv_pidx = df['개인서비스']
house_pidx = df['집세']

prams = [precipi_avg, precipi_max,              # 강수량
         temper_avg, temper_max, temper_min,    # 기온
         windSpeed_avg, windSpeed_max,          # 풍속
         humidity_avg, humidity_min,            # 습도
         sunshine,                              # 일조량
         insolation,                            # 일사량
         last_cultiv_area,                      # 전년 재배면적
         last_production,                       # 전년 평균 생산량
         last_prod_unit,                        # 전년 평균 생산단수
         last_amount_import,                    # 전년 수입량
         crops_price,                           # 해당 농작물 월별 가격
         diesel_price,                          # 월별 경유 가격
         # 물가지수
         total_pidx, prod_pidx, agricul_marine_prod_pidx, indust_prod_pidx, serv_pidx, pub_serv_pidx, per_serv_pidx, house_pidx
         ]
df.head()

Unnamed: 0,평균월강수량(mm),최다월강수량(mm),평균기온(℃),평균최고기온(℃),평균최저기온(℃),평균풍속(m/s),최대풍속(m/s),평균습도(%rh),최저습도(%rh),일조합,일사합,가격,전년생산면적,전년생산단수,전년생산량,전년수입량,경유가격,총 물가지수,상품,농축수산물,공업제품,서비스,집세,공공서비스,개인서비스
2006-01,28.4,99.0,0.6,5.7,-3.8,1.9,24.7,62,7,153.2,253.5,460,16737,6114,1023331,2689,1157,79.306,77.13,75.848,77.59,80.83,79.137,88.025,78.424
2006-02,30.6,102.7,1.2,6.5,-3.7,2.4,31.3,59,7,163.0,317.06,521,16737,6114,1023331,987,1161,79.464,77.13,75.397,77.666,81.07,79.216,88.284,78.734
2006-03,13.4,65.9,5.9,12.2,0.0,2.6,28.1,53,4,217.7,483.36,775,16737,6114,1023331,3414,1170,79.934,77.13,75.397,77.666,81.789,79.216,88.543,79.821
2006-04,101.9,282.8,11.3,16.9,6.0,2.7,25.6,62,5,156.6,448.18,794,16737,6114,1023331,8778,1211,80.013,77.282,74.946,77.974,81.949,79.295,88.629,79.976
2006-05,158.8,333.0,17.3,22.9,12.2,2.1,24.7,68,8,180.3,544.28,584,16737,6114,1023331,1595,1251,80.169,77.511,73.818,78.663,82.029,79.374,88.716,80.054


### Data preprocessing

In [50]:
# 정규화
for pram in prams :
    if pram.name.find('전년') != -1:
        print(pram)
        continue
    pram = Normalizer(pram)
    print(pram)

 2006-01    0.023981
 2006-02    0.027498
 2006-03    0.000000
 2006-04    0.141487
 2006-05    0.232454
 2006-06    0.215188
 2006-07    1.000000
 2006-08    0.179856
 2006-09    0.072582
 2006-10    0.052598
 2006-11    0.059472
 2006-12    0.014868
Name: 평균월강수량(mm), dtype: float64
 2006-01    0.028084
 2006-02    0.031223
 2006-03    0.000000
 2006-04    0.184032
 2006-05    0.226625
 2006-06    0.356440
 2006-07    1.000000
 2006-08    0.194807
 2006-09    0.175717
 2006-10    0.233582
 2006-11    0.139657
 2006-12    0.081113
Name: 최다월강수량(mm), dtype: float64
 2006-01    0.000000
 2006-02    0.023166
 2006-03    0.204633
 2006-04    0.413127
 2006-05    0.644788
 2006-06    0.799228
 2006-07    0.872587
 2006-08    1.000000
 2006-09    0.729730
 2006-10    0.625483
 2006-11    0.320463
 2006-12    0.054054
Name: 평균기온(℃), dtype: float64
 2006-01    0.000000
 2006-02    0.030888
 2006-03    0.250965
 2006-04    0.432432
 2006-05    0.664093
 2006-06    0.810811
 2006-07    0.810811
 

### Modeling

In [None]:
model = Sequential()

model.add(LSTM(
    1, # 해당 층의 노드 개수
    input_shape=(50,1), # input_shape=?
    return_sequences=True)) # return_sequences == 각 시퀀스를 출력할지
model.add(Dropout(0.01)) # 과적합 방지용 Dropout 20%(==0.2)

model.add(Dense(1, activation='sigmoid')) # 활성화(Activation)함수 = sigmoid
model.compile(loss='mse', optimizer='rmsprop')

### Learning

### Prediction