# **Create_Coswara_images.ipynb**

Creation of images from audio files.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

folder = '/content/drive/My Drive/Colab Notebooks/coswara'
if not os.path.exists(folder):
  print(folder + ' does not exist')
data_folder = '/content/drive/My Drive/Colab Notebooks/coswara/public_dataset'
if not os.path.exists(data_folder):
  print(data_folder + ' does not exist')

In [None]:
import pandas as pd
import os
import librosa
import librosa.display
import cv2
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")

Read in dataframe created previously.

In [None]:
df_coswara = pd.read_csv(os.path.join(folder, 'df_coswara.csv'))
df_coswara.head(10)

Unnamed: 0.1,Unnamed: 0,id,age,gender,status,audio_file
0,0,iV3Db6t1T8b7c5HQY2TwxIhjbzD3,28,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
1,1,AxuYWBN0jFVLINCBqIW5aZmGCdu1,25,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
2,2,C5eIsssb9GSkaAgIfsHMHeR6fSh1,28,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
3,3,YjbEAECMBIaZKyfqOvWy5DDImUb2,26,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
4,4,aGOvk4ji0cVqIzCs1jHnzlw2UEy2,32,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
5,6,rB5oGtrGYZR5uJUXEaDYrrredz13,23,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
6,7,OW5RTM4WXPawz0QLpsfjsl4FqM22,33,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
7,8,lF4uHVWCoBPlnxvapmUJ4ROtYV73,26,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
8,9,LYJToDRYDZfpjBuNC6YkUcphr0q1,27,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...
9,11,pOZwqBg4NsVYWASmwwhXFq4UlpC2,35,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...


In [None]:
print(df_coswara.audio_file[0])
print(df_coswara.audio_file[5])

/content/drive/My Drive/Colab Notebooks/coswara/public_dataset/iV3Db6t1T8b7c5HQY2TwxIhjbzD3.wav
/content/drive/My Drive/Colab Notebooks/coswara/public_dataset/rB5oGtrGYZR5uJUXEaDYrrredz13.wav


Use code from virufy.com to change audio files into image files.
(I have made changes to reflect my variable names and I have not split into test and train yet which they had done with their data.)

In [None]:
# Functions to process audio files into images (adapted from code at virufy.com)
def trim_silence(x, *args):
    try:pad,db_max,frame_length,hop_length = args[0],args[1],args[2],args[3]
    except: 
        print('Please enter the following arguments: pad,db_max,frame_length,hop_length')
        return

    _, ints = librosa.effects.trim(x, top_db=db_max, frame_length=256, hop_length=64)
    start = int(max(ints[0]-pad, 0))
    end   = int(min(ints[1]+pad, len(x)))
    return x[start:end]

def process_cough_file(path,trim,*args):
    try: sr,removeaudio,chunk,db_max = args[0],args[1],args[2],args[3]
    except: 
        sr,removeaudio,chunk,db_max= 48000,False,3,50
    try:
        x,sr = librosa.load(path, sr=sr)       
    except: 
        return -1
    
    if len(x)/sr < 0.3 or len(x)/sr > 30:
        return None,None  
    hop_length = np.floor(0.010*sr).astype(int) #10ms
    win_length = np.floor(0.020*sr).astype(int) #20ms  

    if removeaudio:
        os.remove(path)
    
    x = trim(x, 0.25*sr, db_max,win_length,hop_length) 
    x = x[:np.floor(chunk*sr).astype(int)]
    
    #pads to chunk size if smaller
    x_pad = np.zeros(int(sr*chunk))
    x_pad[:min(len(x_pad), len(x))] = x[:min(len(x_pad), len(x))]

    return [x_pad,sr,hop_length,win_length]

def get_melspec(sdir,audio,sr,name):
    #Mel Spectogram
    plt.ioff()
    fig      = plt.figure()
    melspec  = librosa.feature.melspectrogram(y=audio,sr=sr)
    s_db     = librosa.power_to_db(melspec, ref=np.max)
    librosa.display.specshow(s_db)
    fig.canvas.draw()
    img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig=fig)
    #img = img[80:250,80:300]

    savepath = os.path.join(sdir,name+'.png') # Currently saving melspectrogram images to the folders specified in extract features
    cv2.imwrite(savepath,img)
    return savepath

def getlabel(key, dataframe, chosen):
      return dataframe.loc[dataframe[chosen['id']]==key][chosen['status']].tolist()[0]

def extract(df, chosen, savedir):
    if not os.path.isdir(savedir):
        os.mkdir(savedir)
        
    keys, dirs = df[chosen['id']].tolist(),df[chosen['path']].tolist()  
    audio_objs = [process_cough_file(path,trim_silence) for path in dirs]
    false_indices = [i for i in range(len(audio_objs)) if isinstance(audio_objs[i],int) or isinstance(audio_objs[i],tuple)]

    audio_objs = [audio_objs[i] for i in range(len(audio_objs)) if i not in false_indices]
    audio_objs = np.array(audio_objs)
    audio,sr,hop_length,win_length = audio_objs[:,0],audio_objs[:,1],audio_objs[:,2],audio_objs[:,3]
    
    dirs = [dirs[i] for i in range(len(dirs)) if i not in false_indices]
    keys = [keys[i] for i in range(len(keys)) if i not in false_indices]
    data = {key:{'DIR':get_melspec(savedir,a_i,sr_i,key),
             'label':getlabel(key, df, chosen)} for key,a_i,sr_i in list(zip(keys,audio,sr))}
    return data

def filter_DF(df):
    names = list(df.columns)
    chosen= {}
    for name in names:
        if 'status' in name.lower():chosen['status'] = name # Choosing the target
        elif 'audio_file' in name.lower():chosen['path'] = name
        elif 'id' in name.lower() or 'id' == name.lower() :chosen['id'] = name
    return df[[chosen['id'],chosen['status'],chosen['path']]].dropna().reset_index(), chosen 

def create_images(df):
    dataframe, chosen = filter_DF(df)
    features = extract(dataframe, chosen, (folder + '/coswara_images/'))
    return features

Processing the audio files into images.
Google crashes with too many files at once so I have processed them in batches of 1000.

In [None]:
features = create_images(df_coswara.iloc[:5])

In [None]:
print(features)

{'iV3Db6t1T8b7c5HQY2TwxIhjbzD3': {'DIR': '/content/drive/My Drive/Colab Notebooks/coswara/coswara_images/iV3Db6t1T8b7c5HQY2TwxIhjbzD3.png', 'label': 'healthy'}, 'AxuYWBN0jFVLINCBqIW5aZmGCdu1': {'DIR': '/content/drive/My Drive/Colab Notebooks/coswara/coswara_images/AxuYWBN0jFVLINCBqIW5aZmGCdu1.png', 'label': 'healthy'}, 'C5eIsssb9GSkaAgIfsHMHeR6fSh1': {'DIR': '/content/drive/My Drive/Colab Notebooks/coswara/coswara_images/C5eIsssb9GSkaAgIfsHMHeR6fSh1.png', 'label': 'healthy'}, 'YjbEAECMBIaZKyfqOvWy5DDImUb2': {'DIR': '/content/drive/My Drive/Colab Notebooks/coswara/coswara_images/YjbEAECMBIaZKyfqOvWy5DDImUb2.png', 'label': 'healthy'}, 'aGOvk4ji0cVqIzCs1jHnzlw2UEy2': {'DIR': '/content/drive/My Drive/Colab Notebooks/coswara/coswara_images/aGOvk4ji0cVqIzCs1jHnzlw2UEy2.png', 'label': 'healthy'}}


In [None]:
features = create_images(df_coswara.iloc[5:500])

In [None]:
features = create_images(df_coswara.iloc[500:1000])

In [None]:
features = create_images(df_coswara.iloc[1000:1500])

In [None]:
features = create_images(df_coswara.iloc[1500:])

Create dataframe of new image files.

In [None]:
fnames = [fname for fname in os.listdir(folder + '/coswara_images/')]

df_fnames = pd.DataFrame(columns =['id','image_file'])

for fname in fnames:
  split = fname.split(".")
  id = split[0]
  df_fnames = df_fnames.append({'id':id, 'image_file':fname}, ignore_index=True)
print(df_fnames.shape)
print(df_coswara.shape)
df_fnames.head()

(1779, 2)
(1809, 6)


Unnamed: 0,id,image_file
0,iV3Db6t1T8b7c5HQY2TwxIhjbzD3,iV3Db6t1T8b7c5HQY2TwxIhjbzD3.png
1,AxuYWBN0jFVLINCBqIW5aZmGCdu1,AxuYWBN0jFVLINCBqIW5aZmGCdu1.png
2,C5eIsssb9GSkaAgIfsHMHeR6fSh1,C5eIsssb9GSkaAgIfsHMHeR6fSh1.png
3,YjbEAECMBIaZKyfqOvWy5DDImUb2,YjbEAECMBIaZKyfqOvWy5DDImUb2.png
4,aGOvk4ji0cVqIzCs1jHnzlw2UEy2,aGOvk4ji0cVqIzCs1jHnzlw2UEy2.png


Add images to df_coswara dataframe.

In [None]:
left = df_coswara.set_index(['id'])
right = df_fnames.set_index(['id'])
df_coswara = left.join(right)
print(df_coswara.shape)
df_coswara.head()

(1809, 6)


Unnamed: 0_level_0,Unnamed: 0,age,gender,status,audio_file,image_file
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
iV3Db6t1T8b7c5HQY2TwxIhjbzD3,0,28,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,iV3Db6t1T8b7c5HQY2TwxIhjbzD3.png
AxuYWBN0jFVLINCBqIW5aZmGCdu1,1,25,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,AxuYWBN0jFVLINCBqIW5aZmGCdu1.png
C5eIsssb9GSkaAgIfsHMHeR6fSh1,2,28,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,C5eIsssb9GSkaAgIfsHMHeR6fSh1.png
YjbEAECMBIaZKyfqOvWy5DDImUb2,3,26,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,YjbEAECMBIaZKyfqOvWy5DDImUb2.png
aGOvk4ji0cVqIzCs1jHnzlw2UEy2,4,32,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,aGOvk4ji0cVqIzCs1jHnzlw2UEy2.png


Remove rows where image_file does not exist.

In [None]:
print(df_coswara.image_file.isnull().sum())

30


In [None]:
df_coswara = df_coswara.dropna(subset=['image_file'])
print(df_coswara.shape)
df_coswara.head()

(1779, 6)


Unnamed: 0_level_0,Unnamed: 0,age,gender,status,audio_file,image_file
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
iV3Db6t1T8b7c5HQY2TwxIhjbzD3,0,28,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,iV3Db6t1T8b7c5HQY2TwxIhjbzD3.png
AxuYWBN0jFVLINCBqIW5aZmGCdu1,1,25,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,AxuYWBN0jFVLINCBqIW5aZmGCdu1.png
C5eIsssb9GSkaAgIfsHMHeR6fSh1,2,28,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,C5eIsssb9GSkaAgIfsHMHeR6fSh1.png
YjbEAECMBIaZKyfqOvWy5DDImUb2,3,26,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,YjbEAECMBIaZKyfqOvWy5DDImUb2.png
aGOvk4ji0cVqIzCs1jHnzlw2UEy2,4,32,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,aGOvk4ji0cVqIzCs1jHnzlw2UEy2.png


Save df_coughvid dataframe ready to use in the next stage.

In [None]:
folder = '/content/drive/My Drive/Colab Notebooks/coswara'
df_coswara.to_csv(os.path.join(folder, 'df_coswara.csv'))

Check file has saved correctly.

In [None]:
df_coswara = pd.read_csv(os.path.join(folder, 'df_coswara.csv'))
df_coswara.head(10)

Unnamed: 0.1,id,Unnamed: 0,age,gender,status,audio_file,image_file
0,iV3Db6t1T8b7c5HQY2TwxIhjbzD3,0,28,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,iV3Db6t1T8b7c5HQY2TwxIhjbzD3.png
1,AxuYWBN0jFVLINCBqIW5aZmGCdu1,1,25,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,AxuYWBN0jFVLINCBqIW5aZmGCdu1.png
2,C5eIsssb9GSkaAgIfsHMHeR6fSh1,2,28,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,C5eIsssb9GSkaAgIfsHMHeR6fSh1.png
3,YjbEAECMBIaZKyfqOvWy5DDImUb2,3,26,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,YjbEAECMBIaZKyfqOvWy5DDImUb2.png
4,aGOvk4ji0cVqIzCs1jHnzlw2UEy2,4,32,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,aGOvk4ji0cVqIzCs1jHnzlw2UEy2.png
5,rB5oGtrGYZR5uJUXEaDYrrredz13,6,23,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,rB5oGtrGYZR5uJUXEaDYrrredz13.png
6,OW5RTM4WXPawz0QLpsfjsl4FqM22,7,33,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,OW5RTM4WXPawz0QLpsfjsl4FqM22.png
7,lF4uHVWCoBPlnxvapmUJ4ROtYV73,8,26,female,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,lF4uHVWCoBPlnxvapmUJ4ROtYV73.png
8,LYJToDRYDZfpjBuNC6YkUcphr0q1,9,27,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,LYJToDRYDZfpjBuNC6YkUcphr0q1.png
9,pOZwqBg4NsVYWASmwwhXFq4UlpC2,11,35,male,healthy,/content/drive/My Drive/Colab Notebooks/coswar...,pOZwqBg4NsVYWASmwwhXFq4UlpC2.png


In [None]:
df_coswara.shape

(1779, 7)