In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import random
import tensorflow as tf
import os
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import layers,mixed_precision


In [4]:
train_dir='../input/csc4851-homework4/birds_400/train'
val_dir='../input//csc4851-homework4/birds_400/valid'
test_dir='../input/csc4851-homework4/birds_400/test'

In [5]:
def plot_random_image(target_dir):
    """
    takes the directory as input and prints 5 random images from the randomly choosen class.
    """
    target_class=random.choice(os.listdir(target_dir))
    target_folder=os.path.join(target_dir,target_class)
    random_image=random.sample(os.listdir(target_folder),5)
 
    plt.figure(figsize=(32,10))
    for i in range(5):
        
        plt.subplot(1,5,i+1)
        img=tf.io.read_file(os.path.join(target_folder,random_image[i]))
        img=tf.io.decode_image(img)
        plt.imshow(img)
        plt.title(f'{target_class}\n{img.shape}')
        plt.axis(False)

In [6]:
plot_random_image(train_dir)

In [7]:
IMAGE_SIZE=(224,224)

train_data=tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    label_mode='categorical',
    image_size=IMAGE_SIZE
)
class_names=train_data.class_names
num_classes=len(class_names)
val_data=tf.keras.preprocessing.image_dataset_from_directory(
    val_dir,
    label_mode='categorical',
    image_size=IMAGE_SIZE,
    
)
test_data=tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    label_mode='categorical',
    image_size=IMAGE_SIZE,
    shuffle=False
)

train_data_pf=train_data.prefetch(buffer_size=tf.data.AUTOTUNE)
val_data_pf=val_data.prefetch(buffer_size=tf.data.AUTOTUNE)
test_data_pf=test_data.prefetch(buffer_size=tf.data.AUTOTUNE)

In [8]:
data_augmentation=keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.2,fill_mode='nearest'),
    # layers.Rescaling(scale=1.0/255)
],name='Data_Augmentation_Layer')

In [21]:
mixed_precision.set_global_policy('mixed_float16')

inputs=layers.Input(shape=(224,224,3),name='input_layer')

base_model=keras.applications.efficientnet.EfficientNetB0(include_top=False, weights = "imagenet")
base_model.trainable=False

x=data_augmentation(inputs)

x=base_model(x,training=False)

x=layers.GlobalAveragePooling2D(name='Global_Average_Pool_2D')(x)
num_classes=len(train_data.class_names)
outputs=layers.Dense(num_classes,activation='softmax',dtype=tf.float32,name="Output_layer")(x)

model=keras.Model(inputs,outputs,name="model")

In [22]:
model.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [23]:
model.summary()

In [24]:
EPOCHS=10
history_of_model=model.fit(
    train_data_pf,
    epochs=EPOCHS,
    steps_per_epoch=int (0.1*len(train_data_pf)),
    validation_data=val_data_pf,
    validation_steps=len(val_data_pf)    
)

In [25]:
model_0_result=model.evaluate(test_data_pf)
model_0_result

In [26]:
y_prob = model.predict(test_data_pf) 
y_pred = y_prob.argmax(axis=-1)

In [17]:
y_pred[400:450]

In [27]:
def plot_loss_curves(history):
    
    '''
      returns seperate loss curves for training and validation metrics
    '''
    train_loss=history.history['loss']
    val_loss=history.history['val_loss']

    train_accuracy=history.history['accuracy']
    val_accuracy=history.history['val_accuracy']

    epochs=range(1,len(history.history['loss'])+1)
    plt.figure(figsize=(20,7))
  # plot loss data
    plt.subplot(1,2,1)
    plt.plot(epochs,train_loss,label="training_loss")
    plt.plot(epochs,val_loss,label="validation_loss")
    plt.title("Loss curves")
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
  # plt.show()

  # plot accuracy data
    plt.subplot(1,2,2)
    plt.plot(epochs,train_accuracy,label="training_acc")
    plt.plot(epochs,val_accuracy,label="validation_acc")
    plt.title("Accuracy curves")
    plt.xlabel('epochs')
    plt.ylabel('Accuracy')
    plt.legend()

In [28]:
plot_loss_curves(history_of_model)

In [29]:
def log_loss_cal(encoded, y_prob, x,y):
    counter = 0 
    res = 0
    
    for act_row, pred_row in zip(encoded[x:y], y_prob[x:y]):
        counter += 1    
        for class_act, class_pred in zip(act_row, pred_row):
            res += - class_act * np.log(class_pred)
    return res/5

In [30]:
import pandas as pd
df = pd.read_csv('../input/csc4851-homework4/birds_400/birds.csv')
rslt_df = df[df['data set'] == 'test']
rslt_df
index = rslt_df['class index']
y_actual = index.to_numpy()
y_prob = model.predict(test_data_pf) 
y_pred = y_prob.argmax(axis=-1)

In [31]:
from tensorflow.keras.utils import to_categorical
encoded = to_categorical(y_actual)

In [32]:
log_loss1 = []
ID = []
count = 0

for i in range(0,2000,5):
    x = i 
    y = i+5
    ans = log_loss_cal(encoded, y_prob, x, y)
    log_loss1.append(ans)
    ID.append(count)
    count += 1

In [33]:
sub = pd.DataFrame(ID, columns = ['id'])
sub.head()

In [34]:
sub1 = pd.DataFrame(log_loss1, columns = ['birds'])
sub1.head()


In [35]:
final = pd.concat([sub, sub1], axis = 1)
final.head(10)

In [None]:
final.to_csv('sub3.csv', index = False)

Other Models 


In [None]:
mixed_precision.set_global_policy('mixed_float16')

inputs=layers.Input(shape=(224,224,3),name='input_layer')

base_model=keras.applications.resnet50.ResNet50(include_top=False, weights = "imagenet")
base_model.trainable=False

x=data_augmentation(inputs)

x=base_model(x,training=False)

x=layers.GlobalAveragePooling2D(name='Global_Average_Pool_2D')(x)
num_classes=len(train_data.class_names)
outputs=layers.Dense(num_classes,activation='softmax',dtype=tf.float32,name="Output_layer")(x)

model=keras.Model(inputs,outputs,name="model")