# API Multithreading

This notebook is to speed up the process of API calls being made, by using a threadpool

## Imports

In [12]:
import concurrent.futures
import pandas as pd
import requests
import os
import json

## Dallin's imports 
import google.cloud.texttospeech as tts
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import numpy as np
import random
import string


## API Function

#### Contants

In [2]:
voice_list = [('en-US-Neural2-A', 'MALE'),
 ('en-US-Neural2-C', 'FEMALE'),
 ('en-US-Neural2-D', 'MALE'),
 ('en-US-Neural2-E', 'FEMALE'),
 ('en-US-Neural2-F', 'FEMALE'),
 ('en-US-Neural2-G', 'FEMALE'),
 ('en-US-Neural2-H', 'FEMALE'),
 ('en-US-Neural2-I', 'MALE'),
 ('en-US-Neural2-J', 'MALE'),
 ('en-US-News-K', 'FEMALE'),
 ('en-US-News-L', 'FEMALE'),
 ('en-US-News-M', 'MALE'),
 ('en-US-News-N', 'MALE'),
 ('en-US-Standard-A', 'MALE'),
 ('en-US-Standard-B', 'MALE'),
 ('en-US-Standard-C', 'FEMALE'),
 ('en-US-Standard-D', 'MALE'),
 ('en-US-Standard-E', 'FEMALE'),
 ('en-US-Standard-F', 'FEMALE'),
 ('en-US-Standard-G', 'FEMALE'),
 ('en-US-Standard-H', 'FEMALE'),
 ('en-US-Standard-I', 'MALE'),
 ('en-US-Standard-J', 'MALE'),
 ('en-US-Studio-M', 'MALE'),
 ('en-US-Studio-O', 'FEMALE'),
 ('en-US-Wavenet-A', 'MALE'),
 ('en-US-Wavenet-B', 'MALE'),
 ('en-US-Wavenet-C', 'FEMALE'),
 ('en-US-Wavenet-D', 'MALE'),
 ('en-US-Wavenet-E', 'FEMALE'),
 ('en-US-Wavenet-F', 'FEMALE'),
 ('en-US-Wavenet-G', 'FEMALE'),
 ('en-US-Wavenet-H', 'FEMALE'),
 ('en-US-Wavenet-I', 'MALE'),
 ('en-US-Wavenet-J', 'MALE')]

profile_id_list = ['wearable-class-device'##	Smart watches and other wearables, like Apple Watch, Wear OS watch
,'handset-class-device'##	Smartphones, like Google Pixel, Samsung Galaxy, Apple iPhone
,'headphone-class-device'##	Earbuds or headphones for audio playback, like Sennheiser headphones
,'small-bluetooth-speaker-class-device'#	Small home speakers, like Google Home Mini
,'medium-bluetooth-speaker-class-device'#	Smart home speakers, like Google Home
,'large-home-entertainment-class-device'#	Home entertainment systems or smart TVs, like Google Home Max, LG TV
,'large-automotive-class-device'#	Car speakers
,'telephony-class-application'#	Interactive Voice Response (IVR) systems
]

In [37]:
def random_settings_text_to_wav( text: str, idx: int, label: bool, voice_list,profile_list):
    """Mock function that doesn't make API call, just returns filename."""
    voice = random.choice(voice_list)
    voice_name = voice[0]
    gender = voice[1]
    speaking_rate = random.uniform(0.8,1.2)
    pitch = random.uniform(-5,5)
    profile = random.choice(profile_list)
    language_code = "-".join(voice_name.split("-")[:2])
    code = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) 
    filename = f"{language_code}_{code}_{str(idx)}_{str(label)}.wav"
    return filename

#### Read in the dataframe

In [38]:
df = pd.read_csv('labeled_augmented.csv')
print(df.shape)
print(df.head())

(472674, 3)
   Unnamed: 0                            0      1
0         131        worst hook in history  False
1         181      man that silence on axe  False
2         212  you better call your mom od  False
3         213        u need her breastfeed  False
4         331        remember me last game  False


#### Rename dataframe columns

In [39]:
print(df.columns)
df = df.rename(columns={'Unnamed: 0': 'idx', '0': 'Text', '1': 'Flag'})
print(df.columns)
print(df.head())

Index(['Unnamed: 0', '0', '1'], dtype='object')
Index(['idx', 'Text', 'Flag'], dtype='object')
   idx                         Text   Flag
0  131        worst hook in history  False
1  181      man that silence on axe  False
2  212  you better call your mom od  False
3  213        u need her breastfeed  False
4  331        remember me last game  False


### Check getting the filename

In [40]:
test = df.iloc[0]

filename = random_settings_text_to_wav( test['Text'], 0, test['Flag'], voice_list,profile_id_list)
print(filename)

en-US_JQXLXV4AQO_0_False.wav


#### Helper Functions

In [48]:
def get_filename(index, df_row):
    """This function takes in a single row of the dataframe, and 
          outputs a dict of {index: filename}"""
    filename = random_settings_text_to_wav(df_row['Text'], index, df_row['Flag'], voice_list,profile_id_list)
    return {index: filename}

def thread_process_dataframe(df, num_threads=4):
    """This function takes in a df & number of threads, and appends the filename from the api to the df"""
    filename_dict = {}
    error_dict = []
    
    # Create a thread pool 
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        # Submit a job for each row of the threadpool
        jobs_list = {executor.submit(get_filename, index, row): row for index, row in df.iterrows()}
        
        # Wait for all jobs to finish
        for job in concurrent.futures.as_completed(jobs_list):
            try:
                # get result of the job
                result = job.result()

                # update filename dict
                filename_dict.update(result)
            except Exception as e:
                # Save the dict if we have an exception
                error_filename = f'filename_dict_error_{os.getpid()}.json'
                with open(error_filename, 'w') as file:
                    json.dump(filename_dict, file)
                    error_dict.append(error_filename)
    # Next step:
    #  if a filename_dict_error exists, read the last one

    # Once all iterations done, add filenames to the dataframe
    df['Filename'] = df.index.map(lambda x: filename_dict[x])
    
    # Next step:
    #   Delete the fict json files
    
    return df

In [45]:
# Number of workers for multithreading
NUM_WORKERS = 4

test_df = df.iloc[0:4]
print(test_df)
print('--------------------------------------------------------------')
test_df = thread_process_dataframe(test_df, NUM_WORKERS)
print(test_df)

   idx                         Text   Flag
0  131        worst hook in history  False
1  181      man that silence on axe  False
2  212  you better call your mom od  False
3  213        u need her breastfeed  False
--------------------------------------------------------------
   idx                         Text   Flag                      Filename
0  131        worst hook in history  False  en-US_C5VH2DYTTZ_0_False.wav
1  181      man that silence on axe  False  en-US_EI2TJFJPXV_1_False.wav
2  212  you better call your mom od  False  en-US_GKVSI44UAJ_2_False.wav
3  213        u need her breastfeed  False  en-US_DQ48D9643G_3_False.wav


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Filename'] = df.index.map(lambda x: filename_dict[x])
