In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import math
from sklearn.metrics import mean_squared_error

# BKK

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[0]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(60, data_training.shape[0]):
    X_train.append(data_training[i-60:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 60, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))
modelLSTM.add(LSTM(units = 80, activation = 'relu' ,return_sequences = True))
modelLSTM.add(Dropout(0.2))
modelLSTM.add(LSTM(units = 120, activation = 'relu'))
modelLSTM.add(Dropout(0.2))


modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=10, batch_size=32, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
past_60_days = data_training.tail(60)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Chiangmai

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[1]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(60, data_training.shape[0]):
    X_train.append(data_training[i-60:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 60, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))
modelLSTM.add(LSTM(units = 80, activation = 'relu'))
modelLSTM.add(Dropout(0.2))


modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_best_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=10, batch_size=32, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
past_60_days = data_training.tail(60)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Khonkaen

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[2]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(60, data_training.shape[0]):
    X_train.append(data_training[i-60:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 100, activation = 'relu', input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))


modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_best_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=10, batch_size=8, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
data_test = data_test.drop(['date_time'], axis = 1)
data_test.head()

In [None]:
past_60_days = data_training.tail(60)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Rayong

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[3]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(60, data_training.shape[0]):
    X_train.append(data_training[i-60:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 60, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))

modelLSTM.add(LSTM(units = 120, activation = 'relu'))
modelLSTM.add(Dropout(0.2))


modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_best_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=10, batch_size=32, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
data_test.head()

In [None]:
past_60_days = data_training.tail(60)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Saraburi

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[4]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(60, data_training.shape[0]):
    X_train.append(data_training[i-60:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 120, activation = 'relu',  input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))

modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_best_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=10, batch_size=32, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
past_60_days = data_training.tail(60)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Surat

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
province = provinces[5]
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)


In [None]:
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_training.head()

In [None]:
data_test = pd.read_csv(province+'_clean.csv', date_parser = True)
data_test.sample(5)

In [None]:
data_test = data_test.drop(['Unnamed: 0', 'lat', 'long'], axis = 1)
data_test.head()

In [None]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(data_training)
scaler.inverse_transform(data_training)
scale = 1/scaler.scale_


In [None]:
X_train = []
y_train = []

for i in range(90, data_training.shape[0]):
    X_train.append(data_training[i-90:i])
    y_train.append(data_training[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape

In [None]:
modelLSTM = Sequential()

modelLSTM.add(LSTM(units = 120, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 4)))
modelLSTM.add(Dropout(0.2))
modelLSTM.add(LSTM(units = 240, activation = 'relu'))
modelLSTM.add(Dropout(0.2))



modelLSTM.add(Dense(units = 1))

In [None]:
modelLSTM.summary()

In [None]:
#create callback
filepath = province+'_best_modelLSTM_yourself.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='loss',
                             verbose=0, 
                             save_best_only=True,
                             mode='min')

earlystopping = EarlyStopping(
    monitor='loss', 
    patience=3, 
    min_delta=0, 
    mode='auto'
)

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.2,   
    patience=2, 
    min_lr=0.001,
    verbose=0
)
callbacks = [checkpoint, earlystopping, reduce_lr]

In [None]:
modelLSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [None]:
history = modelLSTM.fit(X_train, y_train, epochs=15, batch_size=64, callbacks=callbacks)

In [None]:
data_training = pd.read_csv('../'+province+'/train/'+province.lower()+'_train_format.csv', date_parser = True)
data_training = data_training.drop(['date_time', 'lat', 'long'], axis = 1)
data_test = data_test.drop(['date_time'], axis = 1)

In [None]:
past_60_days = data_training.tail(90)

In [None]:
df = past_60_days.append(data_test, ignore_index = True)
df.head()

In [None]:
df.dropna(inplace=True)
df.shape

In [None]:
df.shape

In [None]:
inputs = scaler.transform(df)
inputs

In [None]:
X_test = []
y_test = []

for i in range(90, inputs.shape[0]):
    X_test.append(inputs[i-90:i])
    y_test.append(inputs[i, 0])

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
y_pred = modelLSTM.predict(X_test)

In [None]:
scale = 1/scaler.scale_[0]
y_pred = y_pred*scale
y_test = y_test*scale

In [None]:
print(province+' RMSE =', math.sqrt(mean_squared_error(y_test, y_pred)) )

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'red', label = 'Real')
plt.plot(y_pred, color = 'blue', label = 'Predicted')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.show()

In [None]:
import pickle

with open('lstm_'+province.lower()+'.pickle', 'wb') as fp:
    pickle.dump(y_pred, fp)

In [None]:
df1 = pd.DataFrame(y_pred)
df1.to_csv('lstm_'+province.lower()+'.csv')

# Submit

In [None]:
param_dict = {'BKK':[7.0, 70.0, 40.0, 100.0],
'Chiangmai':[7.0, 50.0, 40.0, 100.0],
'Rayong':[2.0, 50.0, 30.0, 40.0],
'Saraburi':[3.0, 50.0, 20.0, 90.0],
'Khonkaen':[1.0, 70.0, 30.0, 90.0],
'Surat':[2.0, 60.0, 20.0, 30.0]}

In [None]:
ans = pd.DataFrame.from_dict({'Time':[],'Predicted':[],'PM2.5':[],'Province':[]})
for key,value in param_dict.items():
    
    df1 = pd.read_csv(key+'_clean.csv')
    df1.dropna(inplace=True)
    df1.reset_index(inplace=True)
    
    df2 = pd.read_csv('lstm_'+key.lower()+'.csv')
   

    df3 =  pd.DataFrame()
    df3['Predicted']=df1['date_time'].copy()
    df3['PM2.5']=df2['0'].copy()


    df3['Predicted'] = pd.to_datetime(df3['Predicted'])

    submit=pd.read_csv('submit_time_format_2.csv')
    submit['Predicted'] = pd.to_datetime(submit['Predicted'])
    submit['Time'] = pd.to_datetime(submit['Time'])

   
    result = pd.merge(submit, df3, how="left", on=["Predicted"])

    result['Province'] = key

    
    ans = pd.concat([ans,result])

In [None]:
ans.dropna(inplace=True)
ans.reset_index(inplace=True)
ans['PM2.5'].to_csv('kaggle_lstm.csv')