In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
from keras import layers
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt

class DNN(keras.Sequential):
  def __init__(self, layer, units, activation, input_shape, dropout_rate, optimizer, loss):
    super().__init__()
    self.add(layers.Dense(units=units,activation=activation,input_shape=input_shape))
    #self.add(layers.Dropout(dropout_rate))
    #self.add(layers.BatchNormalization())
    for i in range(1,layer):
      self.add(layers.Dense(units=units,activation=activation))
      #self.add(layers.Dropout(dropout_rate))
      #self.add(layers.BatchNormalization())
    self.add(layers.Dense(units=1))
    self.compile(optimizer=optimizer,loss=loss)

def month_day(month):
  if month == 2:
    return 28
  elif month < 8:
    return 30+month%2
  else:
    return 31-month%2
def day_count(month):
  sum = 0
  for i in range(1,month):
    sum += month_day(i)
  return sum

def load_data_set(data):
  col_name = list(data.columns)
  col_name.pop(0)
  _X=[]
  _y=[]
  x_values=[]
  y_values=[]
  for i in range(0,len(col_name)):
    date = col_name[i]
    y_values = list(data[date])
    x_values = date.split('-')
    x_values = [int(x) for x in x_values]
    for j in range(0,24):
      sum=0
      if y_values[j] == 0 :
        continue
      if y_values[j] > 28000:
        print(x_values)
      sum += (x_values[0] - 2020)*8760 + 24 + day_count(x_values[1])*24 + (x_values[2]-1)*24 + j
      _X.append(sum)
      _y.append(y_values[j])
  return _X,_y

def group_split(X,y,train_size):
  X_train = []
  y_train = []
  X_val = []
  y_val = []
  for i in range(0,3000):
    random = np.random.uniform(0,1)
    if random < train_size:
      X_train.append(X[i])
      y_train.append(y[i])
    else:
      X_val.append(X[i])
      y_val.append(y[i])
  return X_train, y_train, X_val, y_val 

def load_temp_set(data):
  col_name = list(data.columns)
  date = col_name[0]
  temp = col_name[1]
  humid = col_name[2]
  date_set = list(data[date])
  temp_set = list(data[temp])
  humid_set = list(data[humid])
  date_sum = []
  for i in date_set:
    sum = 0
    a = i.split(' ')
    date = a[0].split('-')
    date = [int(x) for x in date]
    sum += (date[0] - 2020)*8760 + 24 + day_count(date[1])*24 + (date[2]-1)*24
    hour = a[1].split(':')
    hour = [int(x) for x in hour]
    sum += hour[0]
    date_sum.append(sum)
  return date_sum,temp_set,humid_set

def combine_set(x,y,date,temp,humid):
  _X = {0:'0'}
  _y = {0:'0'}
  for i in range(len(x)):
    p = x[i]
    _X[p] = [p]
    _y[p] = y[i]
  for i in range(len(date)):
    p = date[i]
    if p in _X:
      _X[p].append(temp[i])
      _X[p].append(humid[i])
  return _X,_y

def filtering_set(x,y):
  _X=[]
  _y=[]
  for i in x:
    if i==0 or len(x[i])==1:
      continue
    _X.append(x[i])
    _y.append(y[i])
  return _X,_y


load_data = pd.read_csv('average_power.csv')
load_temp = pd.read_csv('TempHumid.csv',encoding='cp949')


In [None]:
#load data 튀는 값 제거
load_sum_list = []
strange_data_list =[]
for i in range(load_data.shape[1]):
    loadSum = load_data.iloc[:,i].sum()
    if loadSum<270000:
        strange_data_list.append(i)
    load_sum_list.append(loadSum)
    
plt.plot(load_sum_list)
print(strange_data_list)

In [None]:
dropped_data = load_data.drop(load_data.columns[strange_data_list],axis = 1)

load_sum_list1 = []
strange_data_list1 =[]
for i in range(dropped_data.shape[1]):
    loadSum = dropped_data.iloc[:,i].sum()
    load_sum_list1.append(loadSum)
    
plt.plot(load_sum_list1)

In [None]:
layer = 9
units = 128
activation = 'relu'
train_size = 0.75
input_shape= [3]
dropout_rate = 0.5
optimizer = 'adam'
loss = 'mae'

batch_size = 1
epochs = 100

data_X, data_y = load_data_set(load_data)

date_set,temp_set,humid_set = load_temp_set(load_temp)
print(date_set[0])
raw_X,raw_y = combine_set(data_X,data_y,date_set,temp_set,humid_set)
print(len(raw_X))
X,y = filtering_set(raw_X,raw_y)
print(len(X))
print(X)
X_train, y_train, X_val, y_val = group_split(X, y, train_size)

model = DNN(layer,units,activation,input_shape,dropout_rate,optimizer,loss)
#print(model)
#plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=batch_size,
    epochs=epochs,
    verbose=1, # suppress output since we'll plot the curves
)

In [None]:
history.history['loss']

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[0:, ['loss', 'val_loss']].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()));

In [None]:
model.save('model1')

In [None]:
new_model = keras.models.load_model('model1')

predict = model.predict(X_train)
plt.plot(y_train,'b')
plt.plot(predict,'r')


In [None]:
predict = model.predict(X_val)
plt.plot(y_val,'b')
plt.plot(predict,'r')