In [2]:
import os                          
import caffe                       
import cv2                         
import matplotlib.pyplot as plt    
import numpy as np  
import pandas as pd
import csv
from ipy_progressbar import ProgressBar
from os import listdir
from os.path import isfile, join

Set your caffe-root directory. The folder where models/ resides

In [3]:
caffe_root = "/home/maurice/bin/caffe-master/"                                                                     
# set display defaults                                                                                           

Just code that checks if a specific model can be found. Not important atm.

In [4]:
if os.path.isfile(caffe_root + 'models/bvlc_alexnet/bvlc_alexnet.caffemodel'):             
    print('CaffeNet found.')                                                             
else:       
    #downloading net needs to be implemented
    print('Downloading pre-trained CaffeNet model...')                                   

CaffeNet found.


Set up caffe for gpu-mode and load the net and weigths.

In [13]:
caffe.set_device(0)                      
caffe.set_mode_gpu()   
cur_net_dir = join(caffe_root, "models/bvlc_alexnet") # specify folder for your net here

model_def = join(cur_net_dir , "deploy.prototxt")  # name of your *deploy.prototxt              
model_weights = join(cur_net_dir,'bvlc_alexnet.caffemodel') # file name of caffe model                
net = caffe.Net(model_def, model_weights, caffe.TEST) # use caffe.TEST as we do not want to adjust weights




Input transformer for the images.

In [6]:
# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

In [7]:
def extract_features(file, layer="fc7"):
    try:
        img = caffe.io.load_image(file) 
    except IOError:
        print(file)
        return False, None
    #check if image is blank
    channel_0 = cv2.split(img)[0]
    channel_1 = cv2.split(img)[1]
    channel_2 = cv2.split(img)[2]
    non_zero_pixels = cv2.countNonZero(channel_0)+cv2.countNonZero(channel_1)+cv2.countNonZero(channel_2)
    if non_zero_pixels < 1:
        return False, None
    else:
        img = transformer.preprocess('data', img)
        net.blobs["data"].data[...] = img                                                                                     
        output = net.forward()
        return True, net.blobs[layer].data[0]

In [8]:
def write_features_to_file(features, instance, instance_class, output_file, index=None, write_header=False):
    df = pd.DataFrame(features).T
    df.insert(0, "instance", instance)
    if index is not None:
        df.insert(1, "index", index)
    df.insert(len(df.columns), "class", instance_class)
    df.to_csv(output_file, index=False, mode='a', sep=";", header=write_header)
    
    

Extract features from spectrograms of a single class residing in the same folder.

In [9]:
def batch_extract_class(rootdir, class_name, output_file, write_header=False, layer="fc7"):
    number_of_spectrograms = len(listdir(rootdir))
    for spectrogram in ProgressBar(listdir(rootdir), title="Extracting features for class "+class_name):
        success, features = extract_features(join(rootdir, spectrogram), layer)
        instance_name = spectrogram[:-4]
        write_features_to_file(features, instance_name, class_name, output_file, write_header=write_header)
        if success:
            write_header=False

Extract deep spectrum features from a folder with the class labels provided in a tsv file. Supports spectrogram chunks.

In [10]:
def batch_extract_folder(folder, output_file, label_file, layer="fc7", chunks=False):
    print("Extracting features for folder "+folder)
    number_of_spectrograms = len(listdir(folder))
    label_dict = create_label_dictionary(label_file)
    write_header = True
    for spectrogram in ProgressBar(listdir(folder)):
        success, features = extract_features(join(folder, spectrogram), layer)
        index = None
        instance_name = ""
        if chunks:
            index = int(spectrogram[-8:-4])
            instance_name = spectrogram[:-9]
        else:
            instance_name = spectrogram[:-4]
        write_features_to_file(features, instance_name, label_dict[instance_name], output_file, index, write_header)
        if success:
            write_header=False

Create a python dictionary out of a tsv file in format "devel_0001.wav  class"

In [11]:
def create_label_dictionary(label_file):
    reader = csv.reader(open(label_file), delimiter="\t")
    dictionary = {}
    for row in reader:
        key = row[0][:-4]
        dictionary[key] = row[1]
    return dictionary



Example usage for folder + label file combination. Label file has to be tsv in format: "devel_0001.wav  class"

In [12]:
root_folder = "/home/maurice/Desktop/INTERSPEECH/Addressee_dist/"
color_map = "standard"
labels = "/home/maurice/Desktop/INTERSPEECH/Addressee_dist/lab/ComParE2017_Addressee_with_test.tsv"
folder = join(root_folder,color_map)
batch_extract_folder(folder, "addressee_alexnet_fc7_"+color_map+".csv", labels, "fc7")

Extracting features for folder /home/maurice/Desktop/INTERSPEECH/Addressee_dist/standard


KeyboardInterrupt: 

Example usage for senario where spectrograms belonging to one class are in a specific folder. Set write_header=True only for the first extracted class if you want to append all features to a single file.

In [35]:
expressive_directory ="/media/storage/users/Maurice/Documents/YouTubeData/train/exp/spectrograms"
non_expressive_directory="/media/storage/users/Maurice/Documents/YouTubeData/train/nexp/spectrograms"
batch_extract_class(expressive_directory, "1", "train_spectrograms_alexnet_fc6.csv", True, "fc6")
batch_extract_class(non_expressive_directory, "0", "train_spectrograms_alexnet_fc6.csv", False, "fc6")

Extracting features for class 1
1/691
2/691
3/691
4/691
5/691
6/691
7/691
8/691
9/691
10/691
11/691
12/691
13/691
14/691
15/691
16/691
17/691
18/691
19/691
20/691
21/691
22/691
23/691
24/691
25/691
26/691
27/691
28/691
29/691
30/691
31/691
32/691
33/691
34/691
35/691
36/691
37/691
38/691
39/691
40/691
41/691
42/691
43/691
44/691
45/691
46/691
47/691
48/691
49/691
50/691
51/691
52/691
53/691
54/691
55/691
56/691
57/691
58/691
59/691
60/691
61/691
62/691
63/691
64/691
65/691
66/691
67/691
68/691
69/691
70/691
71/691
72/691
73/691
74/691
75/691
76/691
77/691
78/691
79/691
80/691
81/691
82/691
83/691
84/691
85/691
86/691
87/691
88/691
89/691
90/691
91/691
92/691
93/691
94/691
95/691
96/691
97/691
98/691
99/691
100/691
101/691
102/691
103/691
104/691
105/691
106/691
107/691
108/691
109/691
110/691
111/691
112/691
113/691
114/691
115/691
116/691
117/691
118/691
119/691
120/691
121/691
122/691
123/691
124/691
125/691
126/691
127/691
128/691
129/691
130/691
131/691
132/691
133/691
134/691
135/