In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=False)

In [None]:
!pip install spafe
!pip install praat-parselmouth
!pip install textstat
!pip install pocketsphinx
!pip install ctranslate2==4.4.0

In [None]:
!pip install git+https://github.com/m-bain/whisperx.git

In [None]:
!git clone https://github.com/NeuroTechAnalytics/SpeechCARE.git

In [None]:
import os
import pandas as pd

os.chdir("/kaggle/working/SpeechCARE/codes/acoustic parameters")

In [None]:
from process_file import process_file, process_file_model

In [None]:
import whisperx
import gc
import torch

torch.set_num_threads(1)

vad_model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad',
                              force_reload=True)

device = "cuda"
batch_size = 8 # reduce if low on GPU mem
compute_type = "float32" # change to "int8" if low on GPU mem (may reduce accuracy)

# 1. Transcribe with original whisper (batched)
transcription_model = whisperx.load_model("large-v3", device, compute_type=compute_type)

In [None]:
gp_maps = {'count_pause_segments': 'Fluency of speech',
'hesitation_rate': 'Fluency of speech' ,
'pasue_speech_ratio': 'Fluency of speech',
'pause_lengths_avg' :'Fluency of speech',
'pause_speech_duration_ratio': 'Fluency of speech',
'pause_to_syllable': 'Fluency of speech',
'pause_to_tokens': 'Fluency of speech',
'pause_totallength_ratio': 'Fluency of speech',
'vocalic_interval': 'Rhythimc structure',
'pause_length': 'Fluency of speech',
'mean_words_in_utterance': 'Speech Production Dynamics',
'syllable_count': 'Fluency of speech',
'num_words_to_num_pauses': 'Rhythimc structure',
'articulation_rate': 'Rhythimc structure',
'average_num_of_speech_segments': 'Rhythimc structure',
'num_of_sylablles': 'Rhythimc structure',
'percentage_phonation_time': 'Rhythimc structure',
'phonation_to_syllable': 'Rhythimc structure',
'speech_rate_syllable': 'Rhythimc structure',
'speech_rate_words': 'Rhythimc structure',
'syllabic_interval_duration': 'Fluency of speech',
'token_duration': 'Rhythimc structure',
'transformed_phonation_rate': 'Rhythimc structure',
'vowel_duration': 'Rhythimc structure',
'phonation_time': 'Speech Production Dynamics',
'tlt': 'Speech Production Dynamics',
'count_tokens': 'Speech Production Dynamics',
'mean_length_sentence': 'Speech Production Dynamics',
 'regularity_0': 'Rhythimc structure',
 'regularity_1': 'Rhythimc structure',
 'regularity_2': 'Rhythimc structure',
 'regularity_3': 'Rhythimc structure',
 'regularity_4': 'Rhythimc structure',
 'regularity_5': 'Rhythimc structure',
 'regularity_6': 'Rhythimc structure',
 'regularity_7': 'Rhythimc structure',
 'regularity_8': 'Rhythimc structure',
 'regularity_9': 'Rhythimc structure',
 'PVI_0': 'Rhythimc structure',
 'PVI_1': 'Rhythimc structure',
 'mean_inter_syllabic_pauses': 'Rhythimc structure'
}

In [None]:
def add_group_names(output_file, gp_names):
    names = ["Pausing behavior", "Speech behavior", "Frequency Parameters", "Cepstral Coefficients and Spectral Features", "Voice Quality",
             "Loudness and intensity of the sound", "Speech Signal Complexity", "Info"]

    ranges = [(0, 11), (11, 137),(137, 377), (377, 6137), (6137, 6379), (6379, 6667), (6667, 6859), (6859, 6861)]
    train_df = pd.read_csv(output_file)

    columns = list(train_df.columns)

    group_names = [""] * 6861
    for i, r in enumerate(ranges):
        for j in range(r[0], r[1]):
            if j < 137:
                if "relative_sentence_duration" in columns[j]:
                    group_names[j] = 'Speech Production Dynamics'
                elif "voiceProb" in columns[j]:
                    group_names[j] = 'Fluency of speech'
                else:
                    group_names[j] = gp_names[columns[j]]
                columns[j] = columns[j].upper()

            else:
                group_names[j] = names[i]


    column_names = zip(group_names, columns)
    train_df.columns = pd.MultiIndex.from_tuples(column_names)

    desired_order = [
    "Frequency Parameters", "Cepstral Coefficients and Spectral Features", "Voice Quality",
         "Loudness and intensity of the sound", "Speech Signal Complexity",
    'Rhythimc structure', 'Fluency of speech', 'Speech Production Dynamics', "Info"
    ]

    train_df = train_df.reindex(desired_order, level=0, axis=1)

   

    # Sort the DataFrame based on the desired order
    return train_df
    # train_df.to_csv(output_file, index=False)

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import torch
import csv


def calculate_all_features(input_dir, output_file, label):
    previously_calculated = {}
    if os.path.exists(output_file):
        features_pd = pd.read_csv(output_file)
        previously_calculated = set(features_pd["filename"].tolist())


    csvfile = open(output_file, "a+")
    writer = None
    write_header = True
    if len(previously_calculated) > 0:
        write_header = False
        df = pd.read_csv(output_file)
        fieldnames = list(df.columns)
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    progress_bar = tqdm(range(len(os.listdir(input_dir))), position=0, leave=True)
    for file in os.listdir(input_dir):
        if file in previously_calculated:
            continue
        if file.endswith('.wav'):
                file_path = os.path.join(input_dir, file)
                try:
                    acoustic_features = process_file(file_path)
                    features = process_file_model(file_path, vad_model, utils, transcription_model)
                    features.update(acoustic_features)
                    # features = {}

                except Exception as e:
                    print(e)
                    print(file)
                    features = {}
                features['filename'] = file
                features['label'] = label
                if write_header:
                    writer = csv.DictWriter(csvfile, fieldnames=features.keys())
                    writer.writeheader()
                    write_header = False
                writer.writerows([features])

        progress_bar.update(1)

    csvfile.close()

In [None]:
directory_path_ad = "/kaggle/input/aaaaaa"

In [None]:
import nltk
nltk.download('punkt_tab')

In [None]:
output_file = "out8.csv"
calculate_all_features(directory_path_ad, output_file, 1)
out_df = add_group_names(output_file, gp_maps)
out_df.to_csv("feature.csv", index=True)

In [None]:
out_df.shape