In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import LSTM,Dense, Activation, Dropout
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler

## Read inputs and peek at the data

In [None]:
features = pd.read_csv("./data/features.csv")
power = pd.read_csv("./data/power.csv")

In [None]:
features.head()

In [None]:
power.head()

All the entries except Timestamp. Refer to [Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html) on how `iloc` works and [Stackoverflow](https://stackoverflow.com/a/56311678/15160666) for how it is used.

In [None]:
values = features.iloc[:, 1:].values

In [None]:
df = pd.DataFrame(values, columns=features.columns[1:])

In [None]:
#df["Timestamp"] = pd.to_datetime(df["Timestamp"]).apply(pd.Timestamp.timestamp) # pd.to_datetime(df["Timestamp"])
df

## Make a correlation map between features and generate a heatmap of it.

In [None]:
# Get rid of 1-1 matches to better highlight inter-feature correlations.
corrs = df.corr()
corrs.replace(1.0, corrs.min().min(), inplace=True)

# Upper-right triangle for masking
heatmap_mask = np.triu(corrs)

# Generate heatmap
heatmap = sns.heatmap(data=corrs, annot=True, vmin=0, vmax=corrs.max().max(), mask=heatmap_mask, annot_kws={"size":8})
heatmap.set_title("Coorelation Between Features", fontdict={"fontsize":24}, pad=16)
heatmap.set_autoscale_on(True)

Save the heatmap

In [None]:
fig = heatmap.get_figure()
fig.set_size_inches([84., 42.])
fig.savefig("./data/heatmap2.png", bbox_inches="tight")

Line below outputs an array of `<AxesSubplot:>` objects. But the last 5 entries at the end do not have `title`.

In [None]:
features.hist(bins=50, figsize=(15,14))

## Merge csv files

In [None]:
merged = features.merge(power, on='Timestamp')
merged.to_csv("./data/final.csv", index=False)

In [None]:
merged

In [None]:
merged.info() 

1) #sns heatmap ---> correlation 
1.5) # en önemli featureların ne olduğu hakkında araştırma yapılacak.
2) 77 ---> 40-50 #sns plot ->>
3) #optimization, steepest descent algorithm, rulet(random) ,
4) #feature reduction, 
feaure ,, Correlation incelenecek + null değerler doldurulacak 
5) information theory --> bilgi teorisi (shannon) -- i*logi = # algoritmalarında en büyük en küçük logaritmasını  
2) değeri -->ağırlıklı bir şekilde birleştirme

In [None]:
merged_heatmap = sns.heatmap(data=merged.iloc[:, 1:], annot=True, vmin=0, vmax=1, annot_kws={"size":8})

In [None]:
merged.describe()

In [None]:
merged["Timestamp"] = pd.to_datetime(merged["Timestamp"])

In [None]:
merged_train = merged.iloc[:, 77:78].values

In [None]:
merged_train

In [None]:
ms = MinMaxScaler()
train_set_with_s = ms.fit_transform(merged_train) 

In [None]:
X_train = []
y_train = []

for i in range(50,len(train_set_with_s)):
  X_train.append(train_set_with_s[i-50:i, 0])
  y_train.append(train_set_with_s[i,0])

X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
model = Sequential()

In [None]:
model.add(LSTM(units=20, activation="selu", return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2)) #hyperparameter tuning -- GridSearch, #Dimensionality reduciton(PCa,regularization, )

In [None]:
model.add(LSTM(units=30, activation="selu",return_sequences=True ))
model.add(Dropout(0.2))

In [None]:
model.add(LSTM(units=40, activation="selu", return_sequences=True))
model.add(Dropout(0.3))

In [None]:
model.add(LSTM(units=50, activation="selu"))
model.add(Dropout(0.3))

In [None]:
model.add(Dense(units=1))

In [None]:
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
earlyStopping = EarlyStopping(monitor="val_loss",mode="min",verbose=1,patience=25)

In [None]:
model.fit(x=X_train, y=y_train, epochs = 1, batch_size=64, verbose = 1, callbacks=[earlyStopping])

In [None]:
df_test = pd.read_csv("/content/sample_submission.csv")

In [None]:
df_test

In [None]:
df_test.dropna(axis=0, inplace=True)

In [None]:
real_pressure_value = df_test.iloc[:, 2:].values

In [None]:
df_test

In [None]:
df_test = np.ndarray(df_test)

In [None]:
df_test

In [None]:

df_test.reshape((df_test, df_test[0], df_test[1], 1))

In [None]:
y_pred = model.predict(df_test)