In [1]:
import pandas as pd
import numpy as np
from keras.models import load_model
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve
from sklearn.metrics import confusion_matrix
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# Prepare Test Data

In [None]:
# test dataset
df = pd.read_csv('/content/drive/MyDrive/fyp/fyp2/finalised/csv/test_v2.csv')

df

Unnamed: 0,CamId,img1,img2,temp1,temp2,temp3,weather1,weather2,weather3
0,7211,20120526_064041.jpg,20120526_091002.jpg,16.0,16.0,18.0,sunny,sunny,sunny
1,4801,20110327_084003.jpg,20110327_094004.jpg,7.0,8.0,9.0,cloudy,rainy,rainy
2,4801,20111213_111002.jpg,20111213_104048.jpg,1.0,2.0,2.0,cloudy,rainy,rainy
3,75,20141010_174256.jpg,20141010_191301.jpg,25.0,25.0,32.2,sunny,sunny,sunny
4,684,20121105_174154.jpg,20121105_221148.jpg,10.0,12.8,11.7,sunny,sunny,sunny
...,...,...,...,...,...,...,...,...,...
923,5021,20130818_094627.jpg,20130818_124628.jpg,17.0,21.0,20.0,sunny,sunny,sunny
924,11160,20130822_102323.jpg,20130822_132312.jpg,13.0,20.0,23.0,sunny,sunny,sunny
925,4679,20140831_104013.jpg,20140831_101021.jpg,13.0,13.0,14.0,rainy,rainy,rainy
926,623,20120919_221154.jpg,20120919_021147.jpg,26.7,27.2,27.2,sunny,sunny,sunny


In [None]:
# slice only image and weather type
df = df.loc[:, ['img1', 'img2', 'weather3']]
df = pd.get_dummies(df, columns = ['weather3'])
df.drop('weather3_rainy', axis = 1, inplace = True)
df

Unnamed: 0,img1,img2,weather3_sunny
0,20120526_064041.jpg,20120526_091002.jpg,1
1,20110327_084003.jpg,20110327_094004.jpg,0
2,20111213_111002.jpg,20111213_104048.jpg,0
3,20141010_174256.jpg,20141010_191301.jpg,1
4,20121105_174154.jpg,20121105_221148.jpg,1
...,...,...,...
923,20130818_094627.jpg,20130818_124628.jpg,1
924,20130822_102323.jpg,20130822_132312.jpg,1
925,20140831_104013.jpg,20140831_101021.jpg,0
926,20120919_221154.jpg,20120919_021147.jpg,1


In [None]:
y_test = df.weather3_sunny
X_train = df.drop(columns=['weather3_sunny'])

In [None]:
X_train

Unnamed: 0,img1,img2
0,20120526_064041.jpg,20120526_091002.jpg
1,20110327_084003.jpg,20110327_094004.jpg
2,20111213_111002.jpg,20111213_104048.jpg
3,20141010_174256.jpg,20141010_191301.jpg
4,20121105_174154.jpg,20121105_221148.jpg
...,...,...
923,20130818_094627.jpg,20130818_124628.jpg
924,20130822_102323.jpg,20130822_132312.jpg
925,20140831_104013.jpg,20140831_101021.jpg
926,20120919_221154.jpg,20120919_021147.jpg


In [None]:
test_df = pd.DataFrame(columns = ['img'])
path = '/content/drive/MyDrive/fyp/fyp2/image/'


for i in range(len(df)):
  test_df.loc[len(test_df)] = [str(path) + str(df['img1'].iloc[i])]
  test_df.loc[len(test_df)] = [str(path) + str(df['img2'].iloc[i])]

test_df

Unnamed: 0,img
0,/content/drive/MyDrive/fyp/fyp2/image/20120526...
1,/content/drive/MyDrive/fyp/fyp2/image/20120526...
2,/content/drive/MyDrive/fyp/fyp2/image/20110327...
3,/content/drive/MyDrive/fyp/fyp2/image/20110327...
4,/content/drive/MyDrive/fyp/fyp2/image/20111213...
...,...
1851,/content/drive/MyDrive/fyp/fyp2/image/20140831...
1852,/content/drive/MyDrive/fyp/fyp2/image/20120919...
1853,/content/drive/MyDrive/fyp/fyp2/image/20120919...
1854,/content/drive/MyDrive/fyp/fyp2/image/20130621...


In [None]:
from collections import deque 
import copy 

def filegenerator(df, temporal_length, temporal_stride):
  img_list = list(df.img)
  samples = deque()
  sample_count = 0

  for img in img_list:
    samples.append(img)
    if len(samples)== temporal_length: 
      samples_c = copy.deepcopy(samples)
      sample_count += 1

      # for i in range(temporal_stride):
      #   samples.popleft() 
      yield samples_c
      samples.clear()

In [None]:
def seq_of_frames(df,length,stride):
  file_gen = filegenerator(df,length,stride)
  iterator = True
  data_list = []
  while iterator:
    try:
      X = next(file_gen)
      X = list(X) 
      data_list.append([X])
    except Exception as e:
      print("An exception has occured:",e)
      iterator = False
  return data_list

In [None]:
validation_data = seq_of_frames(test_df,2,2)

An exception has occured: 


In [None]:
validation_data[0]

[['/content/drive/MyDrive/fyp/fyp2/image/20120526_064041.jpg',
  '/content/drive/MyDrive/fyp/fyp2/image/20120526_091002.jpg']]

In [None]:
from keras.utils import data_utils
import keras
import cv2

class DataGenerator(data_utils.Sequence):
  def __init__(self,data,batch_size,dim,is_autoencoder,shuffle):
    #Initializing the values
    self.dim = dim
    self.data  = data
    self.batch_size = batch_size
    self.list_IDs = np.arange(len(data))
    self.is_autoencoder = is_autoencoder
    self.shuffle = shuffle
    self.on_epoch_end()

  def on_epoch_end(self):
    self.indexes = self.list_IDs
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __len__(self):
    return int(np.floor(len(self.data)/self.batch_size))

  def __getitem__(self, index):
    index = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    list_IDs_temp = [self.list_IDs[k] for k in index]
    X = self.__data_generation(list_IDs_temp)
    return X

  def __data_generation(self,list_IDs_temp):
    X_data = []
    for i in list_IDs_temp:
      batch_samples = self.data[i][0]
      temp_data_list = []
      for img in batch_samples:
        try:
          image = cv2.imread(img)
          ext_img = cv2.resize(image,self.dim)
          ext_img = cv2.normalize(ext_img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
          temp_data_list.append(ext_img)
        except Exception as e:
          print('image: ', img, 'Value error ',e)
          
      if len(temp_data_list) == 2:
        X_data.append(temp_data_list)

    X = np.array(X_data)
    if self.is_autoencoder == True:
      return X, X
    else:
      return X

In [None]:
params = {
'batch_size':32,
'dim':(320, 180),
'is_autoencoder':False,
'shuffle':False }
valid_gen = DataGenerator(validation_data,**params)

# Chu, Ho, and Borji (2018) + Random Forests

In [None]:
model1 = load_model('/content/drive/MyDrive/fyp/fyp2/finalised/code/benchmarking checkpoint/version4/ep_50-val_root_mean_squared_error_4.9502.hdf5')
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 2, 178, 318, 32)   896       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2, 176, 316, 32)   9248      
_________________________________________________________________
time_distributed_2 (TimeDist (None, 2, 88, 158, 32)    0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 2, 88, 158, 32)    0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 2, 86, 156, 64)    18496     
_________________________________________________________________
time_distributed_5 (TimeDist (None, 2, 84, 154, 64)    36928     
_________________________________________________________________
time_distributed_6 (TimeDist (None, 2, 42, 77, 64)     0

In [None]:
model1.input

<KerasTensor: shape=(None, 2, 180, 320, 3) dtype=float32 (created by layer 'time_distributed_input')>

In [None]:
result = model1.predict(valid_gen)

In [None]:
result

array([[[16.098368 ],
        [24.533018 ]],

       [[13.840607 ],
        [11.861488 ]],

       [[ 7.9214745],
        [11.106433 ]],

       ...,

       [[16.679596 ],
        [16.869389 ]],

       [[26.55154  ],
        [19.439745 ]],

       [[23.693432 ],
        [24.944563 ]]], dtype=float32)

In [None]:
temp = pd.DataFrame.from_records(result)
temp = temp.applymap(lambda x: x[0])
temp = temp.round(decimals=1)
temp

Unnamed: 0,0,1
0,16.1,24.5
1,13.8,11.9
2,7.9,11.1
3,33.4,37.2
4,17.8,20.7
...,...,...
923,21.8,23.1
924,22.3,24.2
925,16.7,16.9
926,26.6,19.4


In [None]:
performance = pd.DataFrame(columns = ['model','Precision', 'Recall', 'F1', 'Accuracy'])

In [None]:
rfc = pickle.load(open('/content/drive/MyDrive/fyp/fyp2/finalised/code/classification summary/random_forest.sav', 'rb'))
y_pred_RFC = rfc.predict(temp)

print("Accuracy on test set: {:.3f}".format(rfc.score(temp, y_test)))

confusion_majority=confusion_matrix(y_test, y_pred_RFC)

print('Mjority classifier Confusion Matrix\n', confusion_majority)

print('**********************')
print('Mjority TN= ', confusion_majority[0][0])
print('Mjority FP=', confusion_majority[0][1])
print('Mjority FN= ', confusion_majority[1][0])
print('Mjority TP= ', confusion_majority[1][1])
print('**********************')

print('Precision= {:.2f}'.format(precision_score(y_test, y_pred_RFC)))
print('Recall= {:.2f}'. format(recall_score(y_test, y_pred_RFC)))
print('F1= {:.2f}'. format(f1_score(y_test, y_pred_RFC)))
print('Accuracy= {:.2f}'. format(accuracy_score(y_test, y_pred_RFC)))

performance.loc[len(performance)] = ['Chu, Ho, and Borji (2018) + random forest', precision_score(y_test, y_pred_RFC).round(2), recall_score(y_test, y_pred_RFC).round(2), f1_score(y_test, y_pred_RFC).round(2), accuracy_score(y_test, y_pred_RFC).round(2)]

Accuracy on test set: 0.652
Mjority classifier Confusion Matrix
 [[132  70]
 [253 473]]
**********************
Mjority TN=  132
Mjority FP= 70
Mjority FN=  253
Mjority TP=  473
**********************
Precision= 0.87
Recall= 0.65
F1= 0.75
Accuracy= 0.65


# Proposed image temperature prediction model + Random Forests

In [None]:
# model2 = load_model('/content/drive/MyDrive/fyp/fyp2/finalised/code/conceptual checkpoint/version2/ep_47-val_root_mean_squared_error_10.3211.hdf5')
# model2.summary()
model2 = load_model('/content/drive/MyDrive/fyp/fyp2/finalised/code/conceptual checkpoint/version3/ep_45-val_root_mean_squared_error_4.6507.hdf5')
model2.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 2, 5, 10, 512)     14714688  
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2, 25600)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 2, 64)             6570240   
_________________________________________________________________
dense (Dense)                (None, 2, 64)             4160      
_________________________________________________________________
dense_1 (Dense)              (None, 2, 16)             1040      
_________________________________________________________________
dense_2 (Dense)              (None, 2, 1)              17        
Total params: 21,290,145
Trainable params: 21,290,145
Non-trainable params: 0
____________________________________________

In [None]:
result = model2.predict(valid_gen)

In [None]:
temp = pd.DataFrame.from_records(result)
temp = temp.applymap(lambda x: x[0])
temp = temp.round(decimals=1)
temp

Unnamed: 0,0,1
0,23.4,24.2
1,10.5,8.1
2,7.1,6.9
3,35.9,37.5
4,19.0,12.2
...,...,...
923,20.8,24.9
924,23.3,24.2
925,13.5,14.6
926,21.1,23.5


In [None]:
rfc = pickle.load(open('/content/drive/MyDrive/fyp/fyp2/finalised/code/classification summary/random_forest.sav', 'rb'))

y_pred_RFC = rfc.predict(temp)

print("Accuracy on test set: {:.3f}".format(rfc.score(temp, y_test)))

confusion_majority=confusion_matrix(y_test, y_pred_RFC)

print('Mjority classifier Confusion Matrix\n', confusion_majority)

print('**********************')
print('Mjority TN= ', confusion_majority[0][0])
print('Mjority FP=', confusion_majority[0][1])
print('Mjority FN= ', confusion_majority[1][0])
print('Mjority TP= ', confusion_majority[1][1])
print('**********************')

print('Precision= {:.2f}'.format(precision_score(y_test, y_pred_RFC)))
print('Recall= {:.2f}'. format(recall_score(y_test, y_pred_RFC)))
print('F1= {:.2f}'. format(f1_score(y_test, y_pred_RFC)))
print('Accuracy= {:.2f}'. format(accuracy_score(y_test, y_pred_RFC)))

performance.loc[len(performance)] = ['proposed + random forest', precision_score(y_test, y_pred_RFC).round(2), recall_score(y_test, y_pred_RFC).round(2), f1_score(y_test, y_pred_RFC).round(2), accuracy_score(y_test, y_pred_RFC).round(2)]

Accuracy on test set: 0.616
Mjority classifier Confusion Matrix
 [[125  77]
 [279 447]]
**********************
Mjority TN=  125
Mjority FP= 77
Mjority FN=  279
Mjority TP=  447
**********************
Precision= 0.85
Recall= 0.62
F1= 0.72
Accuracy= 0.62


In [None]:
performance.to_csv('/content/drive/MyDrive/fyp/fyp2/finalised/code/combined_model_matrix_v5.csv', encoding='utf-8', index=False)

# Evaluate Performance

In [25]:
df = pd.read_csv('/content/drive/MyDrive/fyp/fyp2/finalised/code/combined_model_matrix_v5.csv')
df

Unnamed: 0,model,Precision,Recall,F1,Accuracy
0,"Chu, Ho, and Borji (2018) + random forest",0.87,0.65,0.75,0.65
1,proposed + random forest,0.85,0.62,0.72,0.62
