## Notes

### Image
* Image size is 128 x 128

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras as k
from PIL import Image
from tqdm.notebook import tqdm
import os
from sklearn import preprocessing
import wandb
from wandb.keras import WandbCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
%matplotlib inline

gpu_available = tf.test.is_gpu_available()
is_cuda_gpu_available = tf.test.is_gpu_available(cuda_only=True)

print(gpu_available, is_cuda_gpu_available)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
False False


In [63]:
wandb.init(project="aircraft-project", entity="thompson_e")

VBox(children=(Label(value=' 2.56MB of 2.56MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▂▃▃▃▃▃▃▁▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██
epoch,▁▁▁▂▂▂▂▁▁▂▂▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇██
loss,█▇▇▇▇▇▇█▇▇▇█▇▇▇▇▇▆▆▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁
val_accuracy,▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▂▂▃▂▃▄▄▄▃▅▅▅▅▆▆▂▇█
val_loss,█▇▇▇▇▇▇█▇▇▇█████▇▇▇▇▇▆▆▆▆▆▅▄▄▄▅▃▃▂▃▃▁▇▁▁

0,1
accuracy,0.43657
best_epoch,45.0
best_val_loss,1.36479
epoch,47.0
loss,1.33921
val_accuracy,0.46192
val_loss,1.38757


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [3]:
batch_size = 128
img_height = 128
img_width = 128
epochs = 100

In [4]:
training_path = 'Train/'
label_file = 'train.csv'
image_path = 'Train/in_images'

y_ambi = set(['other-airplane', 'A220', 'Boeing787', 'Boeing777', 'Boeing737', 'Boeing747', 
                   'C919', 'A321', 'A350', 'A330', 'ARJ21'])
y_clean = labels_ambi = set(['A220', 'Boeing787', 'Boeing777', 'Boeing737', 'Boeing747', 
                   'C919', 'A321', 'A350', 'A330', 'ARJ21'])

In [5]:
# use this to limit the ambiguous choice of "other airplane"
USE_CLEAN = True

In [6]:
labels = pd.read_csv(f'{training_path}{label_file}')
if USE_CLEAN:
    labels = labels[labels.label != 'other-airplane']
labels = labels.to_numpy()

labels

array([['5690_0.png', 'A220'],
       ['5690_1.png', 'A220'],
       ['5690_2.png', 'A220'],
       ...,
       ['4597_0.png', 'Boeing737'],
       ['4597_1.png', 'A321'],
       ['4597_3.png', 'Boeing737']], dtype=object)

In [83]:
class Model:
    def __init__(self):
        self.model = self.build_model()
        print(self.model.summary())
    
    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer((128,128,3), name = 'Input Layer'),
            tf.keras.layers.Conv2D(filters = 8, kernel_size = 9),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Dropout(.5),
            tf.keras.layers.Conv2D(filters = 4, kernel_size = 3, activation = 'relu'),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64,activation='relu'),
            tf.keras.layers.Dense(256,activation='relu'),
            tf.keras.layers.Dense(256,activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax'),
        ])
        opt = tf.keras.optimizers.ADAM(learning_rate=0.005)
        model.compile(optimizer=opt, 
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        return model

In [84]:
model = Model()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_52 (Conv2D)          (None, 120, 120, 8)       1952      
                                                                 
 max_pooling2d_34 (MaxPoolin  (None, 60, 60, 8)        0         
 g2D)                                                            
                                                                 
 dropout_33 (Dropout)        (None, 60, 60, 8)         0         
                                                                 
 conv2d_53 (Conv2D)          (None, 58, 58, 4)         292       
                                                                 
 max_pooling2d_35 (MaxPoolin  (None, 29, 29, 4)        0         
 g2D)                                                            
                                                                 
 flatten_23 (Flatten)        (None, 3364)            

In [53]:
X = []
for file in tqdm(labels[:,0]):
    image = Image.open(f'{image_path}/{file}')
    image = np.asarray(image)[:,:,:3]/255
    X.append(image)
X = np.array(X)

  0%|          | 0/20349 [00:00<?, ?it/s]

In [60]:
print(X.shape, labels.shape)

(20349, 128, 128, 3) (20349, 2)


In [77]:
le = preprocessing.LabelEncoder()
le.fit(labels[:,1])
print(le.classes_)
y = le.transform(labels[:,1])

['A220' 'A321' 'A330' 'A350' 'ARJ21' 'Boeing737' 'Boeing747' 'Boeing777'
 'Boeing787' 'C919']


In [78]:
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, shuffle = True, stratify = y)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.3, shuffle = True)

print(X_train.shape, X_test.shape, X_val.shape)

(16279, 128, 128, 3) (2849, 128, 128, 3) (1221, 128, 128, 3)


In [85]:
model.model.fit(X_train, y_train, epochs=epochs,use_multiprocessing=True,verbose=1, 
                batch_size=batch_size, validation_data=(X_test,y_test) ,callbacks=[WandbCallback()])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
 21/128 [===>..........................] - ETA: 35s - loss: 1.9868 - accuracy: 0.2913

KeyboardInterrupt: 

In [None]:
model.model.evaluate(X_val, y_val)  

In [None]:
preds = model.model.predict(X_val)
p = np.argmax(preds, axis=1)

correct = 0

for i in range(len(p)):
    if p[i] == y_val[i]:
        correct += 1
print(correct, len(p)-correct, len(p))
print(f'Overall Accuracy: {((correct/len(p))*100):.2f}%')

In [None]:
cm = confusion_matrix(y_val,p, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=le.classes_)
d = disp.plot(xticks_rotation='vertical', values_format='.2f')

In [None]:
model.model.save('92p-accuracy-Overfitting')
model.model.save('92p-accuracy-Overfitting.h5')
