In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import librosa
import matplotlib.pyplot as plt
import os
import soundfile as sf
import math
from tqdm import tqdm

In [2]:
df = pd.read_csv('/home/abrol/ARYAN_MT22019/CoughVid/public_dataset/metadata_compiled.csv')

In [3]:
df.columns

Index(['uuid', 'datetime', 'cough_detected', 'latitude', 'longitude', 'age',
       'gender', 'respiratory_condition', 'fever_muscle_pain', 'status',
       'quality_1', 'cough_type_1', 'dyspnea_1', 'wheezing_1', 'stridor_1',
       'choking_1', 'congestion_1', 'nothing_1', 'diagnosis_1', 'severity_1',
       'quality_2', 'cough_type_2', 'dyspnea_2', 'wheezing_2', 'stridor_2',
       'choking_2', 'congestion_2', 'nothing_2', 'diagnosis_2', 'severity_2',
       'quality_3', 'dyspnea_3', 'wheezing_3', 'stridor_3', 'choking_3',
       'congestion_3', 'nothing_3', 'cough_type_3', 'diagnosis_3',
       'severity_3'],
      dtype='object')

In [27]:
def merge_one_hot(row):
    for col in df[['severity_1', 'severity_2', 'severity_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['severity'] = df.apply(merge_one_hot, axis=1)

In [29]:
df['severity'].unique()

array([nan, 'pseudocough', 'mild', 'severe', 'unknown'], dtype=object)

In [33]:
def merge_one_hot_cough_type(row):
    for col in df[['cough_type_1', 'cough_type_2', 'cough_type_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['cough_type'] = df.apply(merge_one_hot_cough_type, axis=1)

In [34]:
df['cough_type'].unique()

array([nan, 'dry', 'unknown', 'wet'], dtype=object)

In [42]:
def merge_one_hot_dyspnea(row):
    for col in df[['dyspnea_1', 'dyspnea_2', 'dyspnea_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['dyspnea'] = df.apply(merge_one_hot_dyspnea, axis=1)
df['dyspnea'].unique()

array([nan, False, True], dtype=object)

In [43]:
df['quality_1'].unique()

array([nan, 'ok', 'poor', 'good', 'no_cough'], dtype=object)

In [39]:
def merge_one_hot_wheezing(row):
    for col in df[['wheezing_1', 'wheezing_2', 'wheezing_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['wheezing'] = df.apply(merge_one_hot_wheezing, axis=1)
df['wheezing'].unique()

array([nan, False, True], dtype=object)

In [41]:
def merge_one_hot_stridor(row):
    for col in df[['stridor_1', 'stridor_2', 'stridor_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['stridor'] = df.apply(merge_one_hot_stridor, axis=1)
df['stridor'].unique()

array([nan, False, True], dtype=object)

In [44]:
def merge_one_hot_quality(row):
    for col in df[['quality_1', 'quality_2', 'quality_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['quality'] = df.apply(merge_one_hot_quality, axis=1)
df['quality'].unique()

array([nan, 'good', 'ok', 'poor', 'no_cough'], dtype=object)

In [45]:
def merge_one_hot_chocking(row):
    for col in df[['choking_1', 'choking_2', 'choking_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['choking'] = df.apply(merge_one_hot_chocking, axis=1)
df['choking'].unique()

array([nan, False, True], dtype=object)

In [46]:
def merge_one_hot_congestion(row):
    for col in df[['congestion_1', 'congestion_2', 'congestion_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['congestion'] = df.apply(merge_one_hot_congestion, axis=1)
df['congestion'].unique()

array([nan, False, True], dtype=object)

In [47]:
def merge_one_hot_nothing(row):
    for col in df[['nothing_1', 'nothing_2', 'nothing_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['nothing'] = df.apply(merge_one_hot_nothing, axis=1)
df['nothing'].unique()

array([nan, True, False], dtype=object)

In [48]:
def merge_one_hot_diagnosis(row):
    for col in df[['diagnosis_1', 'diagnosis_2', 'diagnosis_3']]:
        if pd.notna(row[col]):
            return row[col]
    return float('nan')  # Return None for rows where all values are 0

df['diagnosis'] = df.apply(merge_one_hot_diagnosis, axis=1)
df['diagnosis'].unique()

array([nan, 'healthy_cough', 'lower_infection', 'upper_infection',
       'obstructive_disease', 'COVID-19'], dtype=object)

In [50]:
df.columns

Index(['uuid', 'datetime', 'cough_detected', 'latitude', 'longitude', 'age',
       'gender', 'respiratory_condition', 'fever_muscle_pain', 'status',
       'quality_1', 'cough_type_1', 'dyspnea_1', 'wheezing_1', 'stridor_1',
       'choking_1', 'congestion_1', 'nothing_1', 'diagnosis_1', 'severity_1',
       'quality_2', 'cough_type_2', 'dyspnea_2', 'wheezing_2', 'stridor_2',
       'choking_2', 'congestion_2', 'nothing_2', 'diagnosis_2', 'severity_2',
       'quality_3', 'dyspnea_3', 'wheezing_3', 'stridor_3', 'choking_3',
       'congestion_3', 'nothing_3', 'cough_type_3', 'diagnosis_3',
       'severity_3', 'severity', 'cough_type', 'dyspnea', 'wheezing',
       'stridor', 'quality', 'choking', 'congestion', 'nothing', 'diagnosis'],
      dtype='object')

In [51]:
df.to_csv('all_clubed.csv', index=True)