In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional
import pandas as pd
from torchsummary import summary
from b2aiprep.dataset import VBAIDataset
from b2aiprep.process import Audio, specgram, plot_spectrogram
import IPython.display as Ipd
import os
import torch.nn as nn
import torch.optim as optim

### Audiorecordings(from lectures)

In [10]:
dataset = VBAIDataset('../bids_with_sensitive_recordings')
# data = torch.load('./bids_with_sensitive_recordings/sub-0e2df8b3-a93f-4982-a82c-d96a5c64d153/ses-461EA3E8-4477-4F97-B091-D21F4006B2FC/audio/sub-0e2df8b3-a93f-4982-a82c-d96a5c64d153_ses-461EA3E8-4477-4F97-B091-D21F4006B2FC_Audio-Check_rec-Audio-Check-1.pt')
# print(data.keys())

In [12]:
participant_df = dataset.load_and_pivot_questionnaire('participant')
all_identities = sorted(participant_df['record_id'].to_numpy().tolist())

N = len(all_identities)

train_identities = set(all_identities[:int(0.8*N)])
val_identities = set(all_identities[int(0.8*N):int(0.9*N)])
test_identities = set(all_identities[int(0.9*N):])

print('train:', len(train_identities))
print('val:', len(val_identities))
print('test:', len(test_identities))

train: 142
val: 18
test: 18


In [13]:
qs = dataset.load_questionnaires('recordingschema')
q_dfs = []
for i, questionnaire in enumerate(qs):
    df = dataset.questionnaire_to_dataframe(questionnaire)
    df['dataframe_number'] = i
    q_dfs.append(df)
    i += 1
recordingschema_df = pd.concat(q_dfs)
recordingschema_df = pd.pivot(recordingschema_df, index='dataframe_number', columns='linkId', values='valueString')

person_session_pairs = recordingschema_df[['record_id', 'recording_session_id']].to_numpy().astype(str)
person_session_pairs = np.unique(person_session_pairs, axis=0).tolist()

print('Found {} person/session pairs'.format(len(person_session_pairs)))

Found 204 person/session pairs


In [14]:
class MyAudioDataset(torch.utils.data.Dataset):
	def __init__(self, identities, dataset, person_session_pairs, segment_size=3):
		self.segment_size = segment_size
		
		# get age and airway stenosis classification for all subjects
		participant_df = dataset.load_and_pivot_questionnaire('participant')
		age_df = participant_df[['record_id', 'age']].to_numpy()
		airway_stenosis_df = participant_df[['record_id', 'airway_stenosis']].to_numpy()
        
		age_dict = {}
		for person_id, age in age_df:
			age_dict[str(person_id)] = float(age)
		airway_stenosis_dict = {}
		for person_id, airway_stenosis in airway_stenosis_df:
			airway_stenosis_dict[str(person_id)] = float(airway_stenosis)

		# get all prolonged vowel audios
		self.audio_files = []
		self.age = []
		self.airway_stenosis = []
        
		for person_id, session_id in person_session_pairs:
			if person_id not in identities:
				continue
			vowel_audios = [str(x) for x in dataset.find_audio(person_id, session_id) if str(x).endswith('-Prolonged-vowel.wav')]
			self.audio_files += vowel_audios
			self.age += [age_dict[person_id]]*len(vowel_audios)
			self.airway_stenosis += [airway_stenosis_dict[person_id]]*len(vowel_audios)

	def __len__(self):
		return len(self.audio_files)

	def __getitem__(self, idx):
		audio = Audio.from_file(self.audio_files[idx])
		audio = audio.to_16khz().signal.squeeze()
		# get middle K seconds if audio is too long, pad with zeros if it is too short
		if audio.size(0) > self.segment_size*16000:
			d = (audio.size(0)-self.segment_size*16000)//2
			audio = audio[d:d+self.segment_size*16000]
		else:
			audio = torch.nn.functional.pad(audio, (0,self.segment_size*16000-audio.size(0)), mode='constant', value=0)
		return {'signal': audio, 'age': self.age[idx], 'airway_stenosis': self.airway_stenosis[idx]}

### Demographics

In [16]:
dg = pd.read_csv('../demographics.csv')
dg.head(1)

Unnamed: 0.1,Unnamed: 0,record_id,demographics_session_id,demographics_duration,demographics_completed_by___1,demographics_completed_by___2,demographics_completed_by___3,state_province,country,gender_identity,...,household_count,spouse_partner_sig_other,children,parent,grandparent,other_live_with,others_household_specify,transportation_yn,primary_transportation,q_generic_demographics_complete
0,0,8d5dc52b-e8aa-42e7-ae54-8f05c4667d39,B176636C-3330-4AB4-93A9-1E2305506407,173.0,True,False,False,Tennessee,USA,Female gender identity,...,4.0,No,Yes,Yes,No,No,,Yes,Personal vehicle,Complete


In [35]:
X_train = dg[["record_id", "demographics_session_id"]]
Y_income = dg [["household_income_usa", "household_income_ca", "household_count"]]

Train = dg[["record_id", "demographics_session_id", "household_income_usa", "household_income_ca", "household_count"]]
Train.head(3)


Unnamed: 0,record_id,demographics_session_id,household_income_usa,household_income_ca,household_count
0,8d5dc52b-e8aa-42e7-ae54-8f05c4667d39,B176636C-3330-4AB4-93A9-1E2305506407,"$15,000 to $29,999",,4.0
1,1b07b18b-26f9-405b-a466-29442306a7fe,8F8E68BB-E68C-4EA5-B71A-17D7AAE915C2,,"$150,000 to $199,999",4.0
2,e5db3e0c-6589-4a15-a5e7-8a95e4ed34a5,B94FE4BC-79FF-46A1-86CC-628E2D77874E,,"$50,000 to $99,999",1.0


In [18]:
print("Shape before filtering: ",Y_income.shape)
pre_Y_train = Y_income[(pd.notna(Y_income['household_income_usa']) | pd.notna(Y_income['household_income_ca'])) &
                                   (~((Y_income['household_income_usa'] == 'Prefer not to answer') | 
                                      (Y_income['household_income_ca'] == 'Prefer not to answer')))]
print("Shape after filtering: ",pre_Y_train.shape)


Shape before filtering:  (179, 3)
Shape after filtering:  (115, 3)


In [19]:
#label data: 0 - poverty, 1 - lower, 2 - middle, 3 - upper
Y_train = pd.DataFrame()

for index, row in pre_Y_train.iterrows():
    if pd.notna(pre_Y_train.loc[index, "household_income_usa"]) and pre_Y_train.loc[index, "household_count"] >= 3: # USD; HH >= 3
        income = pre_Y_train.loc[index, "household_income_usa"]
        if income in ['< $15,000', '$15,000 to $29,999']:
            Y_train.at[index, "SES"] = 0
        elif income in ['$30,000 to $$49,999']:
            Y_train.at[index, "SES"] = 1
        elif income in ['$50,000 to $99,999', '$100,000 to $149,999', '$150,000 to $199,999']:
            Y_train.at[index, "SES"] = 2
        elif income in ['$200,000 to $249,999', '>$250,000']:
            Y_train.at[index, "SES"] = 3
        elif income in ['Prefer not to answer']:
            continue
        else:
            print(income)
            raise ValueError("Wrong value for household_income_usa")
    
    elif pd.notna(pre_Y_train.loc[index, "household_income_usa"]): # USD; HH < 3
        income = pre_Y_train.loc[index, "household_income_usa"]
        if income in ['< $15,000']:
            Y_train.at[index, "SES"] = 0
        elif income in ['$15,000 to $29,999', '$30,000 to $$49,999']:
            Y_train.at[index, "SES"] = 1
        elif income in ['$50,000 to $99,999']:
            Y_train.at[index, "SES"] = 2
        elif income in ['$100,000 to $149,999', '$150,000 to $199,999', '$200,000 to $249,999', '>$250,000']:
            Y_train.at[index, "SES"] = 3
        elif income in ['Prefer not to answer']:
            continue
        else:
            print(income)
            raise ValueError("Wrong value for household_income_usa")
        
    elif pd.notna(pre_Y_train.loc[index, "household_income_ca"]):  # CA; HH >= 3
        income = pre_Y_train.loc[index, "household_income_ca"]
        if income in ['< $15,000', '$15,000 to $29,999']:
            Y_train.at[index, "SES"] = 0
        elif income in ['$30,000 to $$49,999']:
            Y_train.at[index, "SES"] = 1
        elif income in ['$50,000 to $99,999', '$100,000 to $149,999']:
            Y_train.at[index, "SES"] = 2
        elif income in ['$150,000 to $199,999', '$200,000 to $249,999', '>$250,000']:
            Y_train.at[index, "SES"] = 3
        elif income in ['Prefer not to answer']:
            continue
        else:
            print(income)
            raise ValueError("Wrong value for household_income_ca")
        
    else:
        print(index)
        

Y_train_encoded = torch.nn.functional.one_hot(torch.tensor(Y_train["SES"].values, dtype=torch.int64))      
print("Encoded labels: ", Y_train_encoded[0:3])
print("Shape: ", Y_train_encoded.shape)
            

Encoded labels:  tensor([[1, 0, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 1, 0]])
Shape:  torch.Size([115, 4])


### Audiofiles

In [23]:
class RainbowAudioDataset(torch.utils.data.Dataset):
	def __init__(self, data, labels):
		# self.segment_size = segment_size
		self.data = data
		self.labels = labels
		
		# get location for every recording of rainbow passage
		for index, row in data.iterrows():
			subject = "sub-"+row['record_id']
			session = "ses-"+row['demographics_session_id']
			location = str("../bids_with_sensitive_recordings/" + subject + "/" + session + '/audio/'+subject+"_"+session+"_Rainbow-Passage_rec-Rainbow-Passage.wav")
			if os.path.exists(location):
				self.data.at[index, "audio_location"] = location
			else:
				# print all patients without Rainbow passage recording   
				# print(location)
				# data.at[index, "audio_location"] = None
				self.data = self.data.drop(index = index)
				self.labels = torch.cat((self.labels[:index], self.labels[index + 1:]), dim=0)
		self.data.reset_index(drop=True, inplace=True)

	def __len__(self):
		return len(self.data)

	def __getitem__(self, idx):
		audio = Audio.from_file(self.data.at[idx, "audio_location"])
		audio = audio.to_16khz().signal.squeeze()
		d = (audio.size(0)-282947)//2
		audio = audio[d:d+282947]
		return audio, self.label[idx]

		# # get middle K seconds if audio is too long, pad with zeros if it is too short
		# if audio.size(0) > self.segment_size*16000:
		# 	d = (audio.size(0)-self.segment_size*16000)//2
		# 	audio = audio[d:d+self.segment_size*16000]
		# else:
		# 	audio = torch.nn.functional.pad(audio, (0,self.segment_size*16000-audio.size(0)), mode='constant', value=0)

	def analyze_length(self):
		total_length = 0
		min_val = 10e9
		max_val = 0
		for idx in range(len(self.data)):
			audio = Audio.from_file(self.data.at[idx, "audio_location"])
			audio = audio.to_16khz().signal.squeeze()
			length = audio.size(0)  # Number of samples in the audio
			min_val = min_val if min_val < length else length
			max_val = max_val if max_val > length else length
			total_length += length
		average_length = total_length / len(self.data)
		return average_length, min_val, max_val


torch.Size([115, 4])

In [25]:


train_dataloader = torch.utils.data.DataLoader(dataset[:int(0.8*N)], batch_size=8, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(dataset[int(0.8*N):int(0.9*N)], batch_size=8, shuffle=False)
test_dataloader = torch.utils.data.DataLoader(dataset[int(0.9*N):], batch_size=8, shuffle=False)

dataset = RainbowAudioDataset(X_train, Y_train_encoded)
avg, min_val, max_val = dataset.analyze_length()
print(avg, min_val, max_val)

470006.49707602337 282947 2278528


In [26]:
N = len(dataset)

train_dataloader = torch.utils.data.DataLoader(dataset[:int(0.8*N)], batch_size=8, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(dataset[int(0.8*N):int(0.9*N)], batch_size=8, shuffle=False)
test_dataloader = torch.utils.data.DataLoader(dataset[int(0.9*N):], batch_size=8, shuffle=False)

InvalidIndexError: slice(None, 136, None)