##### Fix the Code (Marks 30)
<pre>
 - Read the given paper carefully.
 - Separate the code into multiple cells and explain the code and each segment in Markdown. 
 - Rename and change the variable name for meaningful names.
 - Clean and remove the commented codes and print codes.
 - Get the final test accuracy and loss and print it.
</pre>

### Importing the necessary libraries

In [62]:

import tensorflow as tf

from tensorflow.keras import datasets, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
from PIL import Image

In [32]:
d = "G:\\ML_Exam\\ml-2023-test\\concrete\\"      # image folder directory
m =  "G:\\ML_Exam\\ml-2023-test\\trainedModel\\" # 
img_size = 64                                    # resizing to image size
nb_epochs = 10                                   # no of epochs to train the model
validation_split = 0.2                           # 20% of images from both folders are taken for validation
                                                 # other 80% are taken for training

#### Reading and normalizing and seperating the data

In [33]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   validation_split=validation_split)  # normalize the images and set validation split

train_generator = train_datagen.flow_from_directory(
    d,
    target_size=(img_size, img_size),
    class_mode= 'binary', # only two classes are there: binary
    subset='training')    # set as training data

validation_generator = train_datagen.flow_from_directory(
    d,
    target_size=(img_size, img_size),
    class_mode='binary', 
    subset='validation')  # set as validation data

Found 32000 images belonging to 2 classes.
Found 8000 images belonging to 2 classes.


### Defining the model. 
#### Since we are dealing with unstructured data (images), we will use a CNN

In [34]:
model = models.Sequential()

#convolutional layer and maxpool layer 1
model.add(Conv2D(32,(3,3),1, input_shape=(img_size,img_size,3)))
model.add(MaxPooling2D((2,2)))

#convolutional layer and maxpool layer 2
model.add(Conv2D(32,(3,3),1, activation = 'relu'))
model.add(MaxPooling2D((2,2)))

#flattens the resulting image array to 1D array
model.add(Flatten())

#Hidden layer with 32 neurones and Rectified Linear Unit activation function
model.add(Dense(32, activation='relu'))

#Output layer with single neurone, which will give 0 for Negative and 1 for Positive
#Sigmoid activation function will make sure our output lies between 0-1
model.add(Dense(1, activation='sigmoid'))

In [35]:
# Compiling the created model using following
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 32)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 800)               0         
                                                                 
 dense (Dense)               (None, 32)                2

In [36]:
# Define the callback to save the best model based depending on validation accuracy
checkpoint_callback = ModelCheckpoint(
    filepath=m,
    monitor='val_accuracy',
    save_best_only=True,     # Save only the best model
    mode='max',              # maximize the chosen metric
    verbose=1
)

In [75]:
#Training the model with training data and validation data

history = model.fit(train_generator, validation_data=validation_generator, callbacks=[
                    checkpoint_callback], epochs=nb_epochs)

##ImportError: Could not import PIL.Image. The use of `load_img` requires PIL.

ImportError: Could not import PIL.Image. The use of `load_img` requires PIL.

In [None]:
model.save("best.h5")

In [76]:
#plotting the accuracy and validation accuracy
#can detect underfitting and any overfitting

plt.plot(history.history['accuracy'], color='teal' ,label='accuracy')
plt.plot(history.history['val_accuracy'], color='red' ,label = 'val_accuracy')

#accuracy plotted against no of epochs 
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

#output is plotted inbetween 0-100%
plt.ylim([0, 1])
plt.legend(loc='lower right')

NameError: name 'history' is not defined

##### Continuing with the example best.h5

In [38]:
#loading the model

model = tf.keras.models.load_model("G:\\ML_Exam\\ml-2023-test\\example\\best.h5")

In [39]:
labels = (train_generator.class_indices)
print(labels)

{'Negative': 0, 'Positive': 1}


### Model testing with test images

In [40]:
path = './test/' #path to the test images
k = [] #array of images
names = [] #array of image names

for filename in os.listdir(path):
    p = os.path.join(path, filename)
    if 'jpg' in p:
        # print(p)
        c = cv2.imread(p) #read the image 
        c = cv2.resize(c, (img_size, img_size), interpolation=cv2.INTER_LINEAR) #resize the read image
        k.append(c) 
        names.append(filename)
k = np.array(k)

In [55]:
print(k.shape)
print(np.size(names))

(12, 64, 64, 3)
12


In [42]:
class_names =  [0,1] #class names of two categories

#### Predicting the test images

In [57]:
#storing the fact if predicted value is greater/less than 0.5
#which category the k values belong to

predicted_labels = (model.predict(k) > 0.5).astype("int32") 
print(predicted_labels.shape)
print(predicted_labels) #2D array



(12, 1)
[[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]]


In [58]:
predicted_labels = predicted_labels.flatten()
print(predicted_labels.shape)
print(predicted_labels) # 1D array

(12,)
[0 0 0 0 0 0 1 1 1 1 1 1]


In [60]:
predicted_labels = [class_names[i] for i in predicted_labels]
print(predicted_labels) #list

[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]


In [66]:
print(names)

['n00001.jpg', 'n00002.jpg', 'n00003.jpg', 'n00004.jpg', 'n00005.jpg', 'n00006.jpg', 'p00001.jpg', 'p00002.jpg', 'p00003.jpg', 'p00004.jpg', 'p00005.jpg', 'p00006.jpg']


#### Storing the results of the predictions

In [67]:
test_arr = []
for i in names:

    #if image contains 'n', which is negatives, 0 added to test_arr
    if i[0] == 'n': 
        test_arr.append(0) 

    #if image contains 'p', which is negatives, 1 added to test_arr    
    elif i[0] == 'p' : 
        test_arr.append(1)

test_arr = np.array(test_arr)

### Calculating the accuracy and loss of the model

In [68]:
if (len(test_arr) == len(k)):
    score = model.evaluate(k, test_arr)



In [74]:
if (len(test_arr) == len(k)):
    
    score = model.evaluate(k, test_arr)

    print(labels)
    print(names)

    print('Test loss:', score[0]) 
    print('Test accuracy:', score[1]) 
    
else:
    print("Something wrong with the prediction.") 

{'Negative': 0, 'Positive': 1}
['n00001.jpg', 'n00002.jpg', 'n00003.jpg', 'n00004.jpg', 'n00005.jpg', 'n00006.jpg', 'p00001.jpg', 'p00002.jpg', 'p00003.jpg', 'p00004.jpg', 'p00005.jpg', 'p00006.jpg']
Test loss: 0.0
Test accuracy: 1.0
