This notebook evaluates the trained VGG16 finetuned model against the testing set. It also provides some visual examples of the predictios.

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import models
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from keras_preprocessing.image import ImageDataGenerator
import pandas as pd

In [None]:
%%capture
!unzip /content/drive/MyDrive/Colab_Notebooks/podcast/appledataset/images.zip -d  /content/images

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/podcast/appledataset/podcast_final.csv')

In [None]:
model = keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/podcast/VGG16_finetuned_1dense2048.h5')

In [None]:
classes_dict = {'After Shows': 0, 'Animation & Manga': 1, 'Arts': 2, 'Books': 3, 'Business': 4, 'Comedy': 5, 'Design': 6, 'Documentary': 7,
                'Education': 8, 'Fashion & Beauty': 9, 'Fiction': 10, 'Food': 11, 'Games': 12, 'Health': 13, 'Hobbies': 14, 'Interviews': 15, 
                'Kids & Family': 16, 'Music': 17, 'Nature': 18, 'News': 19, 'Non-Profit': 20, 'Personal': 21, 'Pets & Animals': 22, 'Places & Travel': 23, 
                'Politics': 24, 'Religion': 25, 'Science': 26, 'Sexuality': 27, 'Society & Culture': 28, 'Sports': 29, 'Stand-Up': 30, 'TV & Film': 31, 'Technology': 32,
                'True Crime': 33, 'Vehicles': 34}




In [None]:
from sklearn.utils import shuffle
df = shuffle(df, random_state = 42)

In [None]:
df_train = df[:28279]
df_test = df[28279:]

In [None]:
filepaths = df['filepaths'].tolist() #change the paths according to colab directory
new = []
for el in filepaths:
  path = el.split('/')[-1]
  path= '/content/images/' + path
  new.append(path)
df['filepaths'] = new

In [None]:
datagen=ImageDataGenerator(rescale=1./255,validation_split=0.25)
train_generator=datagen.flow_from_dataframe(dataframe=df_train, 
                                            directory=None, x_col="filepaths", y_col="Primary Genre", 
                                            class_mode="categorical", target_size=(224,224), batch_size=32,
                                            shuffle = True, subset='training')
validation_generator=datagen.flow_from_dataframe(dataframe=df_train, 
                                            directory=None, x_col="filepaths", y_col="Primary Genre", 
                                            class_mode="categorical", target_size=(224,224), batch_size=32,
                                            shuffle = True, subset='validation')

test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(
                                            dataframe=df_test,
                                            directory=None,
                                            x_col="filepaths",
                                            y_col='Primary Genre',
                                            batch_size=32,
                                            target_size=(224,224))

Found 21210 validated image filenames belonging to 35 classes.
Found 7069 validated image filenames belonging to 35 classes.
Found 1000 validated image filenames belonging to 35 classes.


In [None]:
test_generator.reset()

In [None]:
filenames = test_generator.filenames
nb_samples = len(filenames)

In [None]:
predict = model.predict(test_generator,steps = nb_samples/32, verbose = 1)



In [None]:
predicted_class_indices=np.argmax(predict,axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions, 'secondary genre':df_test['Genre IDs'].tolist(), 'real': df_test['Primary Genre'].tolist(),
                      'description': df_test['Description'].tolist()})

In [None]:
results.head(9)

Unnamed: 0,Filename,Predictions,secondary genre,real,description
0,/content/images/id124116392.jpg,Kids & Family,"['Daily News', 'Podcasts', 'News']",News,RN Breakfast daily stories separated out for e...
1,/content/images/id1495557557.jpg,Science,"['Music History', 'Podcasts', 'Music']",Music,"For over 4 decades, Bob Clearmountain has defi..."
2,/content/images/id1552114238.jpg,Religion,"['TV Reviews', 'Podcasts', 'TV & Film']",TV & Film,If you’ve never watched Little House on the Pr...
3,/content/images/id1545221046.jpg,Science,"['Entrepreneurship', 'Podcasts', 'Business']",Business,No-bullsh*t strategies to help you *CATAPULT* ...
4,/content/images/id285097604.jpg,Religion,"['Music', 'Podcasts']",Music,Presented by Mark Knight: 2 hours of the hotte...
5,/content/images/id1525926766.jpg,Religion,"['Drama', 'Podcasts', 'Fiction', 'Society & Cu...",Fiction,"The Adventures of Sam Spade, Detective was a r..."
6,/content/images/id1578559817.jpg,Sports,"['Film History', 'Podcasts', 'TV & Film', 'Tru...",TV & Film,It’s one of the wildest scandals in Hollywood ...
7,/content/images/id1551541889.jpg,Hobbies,"['Politics', 'Podcasts', 'News', 'Government']",Politics,"Washington, D.C.‘s fiercest independent report..."
8,/content/images/id1051566354.jpg,Health,"['1473', '26', '1325']",Politics,Justin Robert Young announces his intention to...


In [None]:
genre_ids= [['1311', 'News & Politics'], ['26', 'Podcasts'], ['1479', 'Social Sciences'], ['1315', 'Science & Medicine'],
            ['1324', 'Society & Culture'], ['1302', 'Personal Journals'], ['1469', 'Language Courses'], ['1304', 'Education'], 
            ['1320', 'Places & Travel'], ['1416', 'Higher Education'], ['1465', 'Professional'], ['1316', 'Sports & Recreation'],
            ['1303', 'Comedy'], ['1305', 'Kids & Family'], ['1439', 'Christianity'], ['1314', 'Religion & Spirituality'], 
            ['1444', 'Spirituality'], ['1309', 'TV & Film'], ['1462', 'History'], ['1310', 'Music'], ['1478', 'Medicine'], 
            ['1321', 'Business'], ['1412', 'Investing'], ['1420', 'Self-Help'], ['1307', 'Health'], ['1481', 'Alternative Health'], 
            ['1417', 'Fitness & Nutrition'], ['1467', 'Amateur'], ['1480', 'Software How-To'], ['1318', 'Technology'], ['1448', 'Tech News'], 
            ['1456', 'Outdoor'], ['1477', 'Natural Sciences'], ['1301', 'Arts'], ['1454', 'Automotive'], ['1323', 'Games & Hobbies'], 
            ['1438', 'Buddhism'], ['1443', 'Philosophy'], ['1401', 'Literature'], ['1402', 'Design'], ['1410', 'Careers'], ['1470', 'Training'],
            ['1413', 'Management & Marketing'], ['1306', 'Food'], ['1406', 'Visual Arts'], ['1446', 'Gadgets'], ['1468', 'Educational Technology'],
            ['1405', 'Performing Arts'], ['1460', 'Hobbies'], ['1471', 'Business News'], ['1404', 'Video Games'], ['1450', 'Podcasting'], 
            ['1473', 'National'], ['1325', 'Government & Organizations'], ['1461', 'Other Games'], ['1466', 'College & High School'], 
            ['1459', 'Fashion & Beauty'], ['1476', 'Non-Profit'], ['1415', 'K-12'], ['1455', 'Aviation'], ['1464', 'Other'], 
            ['1421', 'Sexuality'], ['1472', 'Shopping'], ['1475', 'Local'], ['1441', 'Judaism'], ['1440', 'Islam'],
            ['1474', 'Regional'], ['1463', 'Hinduism']]


In [24]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

plot_df = results[9:15]
second_genre = plot_df['secondary genre'].tolist()
names =plot_df['Predictions'].tolist()
real = plot_df['real'].tolist()
descr = plot_df['description'].tolist()
plt.figure(figsize=(35, 25))
for index, image in enumerate(plot_df['Filename'].tolist()):
      ax = plt.subplot(2, 3, index + 1)
      img = mpimg.imread(image)
      imgplot = plt.imshow(img)
      sec_gen = second_genre[index]
      if '[' in second_genre[index]:
        new_genres = []
        g_ids =sec_gen.strip('][').split(', ')
        for el in g_ids:
          el = el.strip("'")
          for genre in genre_ids:
            if el in genre:
              new_genres.append(genre[1])
        sec_gen = new_genres
      plt.title(f'predicted: {names[index]} (real: {real[index]})\n secondary genres: {str(sec_gen)}', fontdict={'fontsize': 18})
      plt.axis("off")


Output hidden; open in https://colab.research.google.com to view.

In [None]:
model.compile(
    optimizer=keras.optimizers.RMSprop(),
    loss="categorical_crossentropy",metrics=["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(k=5)]
)

In [None]:
history_eval = model.evaluate(test_generator)



In [None]:
import json
history = {'loss': history_eval[0], 'accuracy': history_eval[1], 'top_5_accuracy': history_eval[2]}
print(history)
json.dump(history, open('/content/drive/MyDrive/Colab_Notebooks/podcast/logs/VGG16_eval.json', 'w'))

{'loss': 3.0054075717926025, 'accuracy': 0.1979999989271164, 'top_5_accuracy': 0.4729999899864197}
