In [1]:
import requests # This is the library we use to make HTTP requests
from IPython.display import JSON # This is the library we use to display JSON nicely in the notebook
import pandas as pd # This is the library we use to display data nicely in the notebook
import numpy as np # This is the library we use to do math
import time # This is the library we use to get the current time (for timestamps)
import os # This is the library we use to get the current working directory

#Visualizations
from wordcloud import WordCloud, STOPWORDS # This is the library we use to generate word clouds from text data, and to filter out common words
import plotly.express as px # This is the library we use to generate interactive visualizations (like the map) from data

#Panel/hvplot 
import panel as pn # panel is a library that allows us to create interactive visualizations 
pn.extension() # pn.extension() is a command we use to enable the interactive features of the library 
import param # param is a library that allows us to create interactive visualizations
import hvplot.pandas # hvplot is a library that allows us to create interactive visualizations

#Others
import pickle # This is the library we use to save and load data from files
from io import StringIO # This is the library we use to read data from strings (like the CSV data we get from the API)


In [7]:
# API key
from api_key import API_KEY # This is the file we created to store our API key in a variable called API_KEY

In [11]:
# Submitting Files for Transcription
import requests
endpoint = "https://api.assemblyai.com/v2/transcript"
json = {
    "audio_url": "https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true",
    "auto_highlights": True,
    "sentiment_analysis": True,
    "auto_chapters": True,
    "iab_categories": True,
}
headers = {
    "authorization": API_KEY,
    "content-type": "application/json"
}
response = requests.post(endpoint, json=json, headers=headers)
print(response.json())

{'id': 'ovrftaqrb5-8daf-4da8-a6d4-f059e32eb0f5', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'queued', 'audio_url': 'https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': False, 'redact_pii_audio': False, 'redact_pii_audio_quality': None, 'redact_pii_policies': None, 'redact_pii_sub': None, 'speaker_labels': False, 'content_safety': False, 'iab_categories': True, 'content_safety_labels': {}, 'iab_categories_res

In [12]:
result_endpoint = f"{endpoint}/" + response.json()["id"] # This is the endpoint we use to get the results of the transcription
headers_auth = {
    "authorization": API_KEY, # This is the header we use to authenticate our request
}
transcript_response = requests.get(result_endpoint, headers=headers_auth) # This is the response we get from the API when we make a GET request to the endpoint
print(transcript_response.json()) # This is the JSON data we get from the API 

# While the transcription is still processing, we wait for it to finish
while transcript_response.json()["status"] == "processing":
    time.sleep(1) # This is a command we use to wait for 1 second
    transcript_response = requests.get(result_endpoint, headers=headers_auth) # This is the response we get from the API when we make a GET request to the endpoint
    print(transcript_response.json()) # This is the JSON data we get from the API

{'id': 'ovrftaqrb5-8daf-4da8-a6d4-f059e32eb0f5', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'completed', 'audio_url': 'https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true', 'text': "Come on, Harry. We wanna say goodnight to you. You always knew. Why don't we leave it? There's nothing to say and everything gets in the way. You know it's not the same as it was in this world. It just loved the same as it was you.", 'words': [{'text': 'Come', 'start': 310, 'end': 474, 'confidence': 0.97301, 'speaker': None}, {'text': 'on,', 'start': 512, 'end': 714, 'confidence': 0.999, 'speaker': None}, {'text': 'Harry.', 'start': 752, 'end': 1054, 'confidence': 0.9, 'speaker': None}, {'text': 'We', 'start': 1102, 'end': 1350, 'confidence': 0.89978, 'speaker': None}, {'text': 'wanna', 'start': 1400, 'end': 1686, 'confidence': 0.80924, 'speaker': None}, {'text': 'say', 'start': 1748, 'end':

In [13]:
JSON(transcript_response.json()) # This is a command we use to display the JSON data nicely in the notebook 

<IPython.core.display.JSON object>

In [14]:
# Save pickle
with open('speech_data.pkl', 'wb') as f: 
    pickle.dump(transcript_response.json().copy(), f) # This is a command we use to save the data to a file called speech_data.pkl

In [15]:
# Load data pickle
with open('speech_data.pkl', 'rb') as f:
    data = pickle.load(f) # This is a command we use to load the data from the file called speech_data.pkl

In [16]:
buffer = StringIO() # This is a command we use to create a buffer to hold the data
buffer.write(data["text"]) # This is a command we use to write the data to the buffer
buffer.seek(0) # This is a command we use to move the buffer to the beginning of the data

0

In [23]:
from fileinput import filename


transcript_download = pn.widgets.FileDownload(buffer, filename="transcript.txt", # This is a command we use to create a download button for the transcript
                                                botton_type="primary") # This is a command we use to style the download button
transcript_download # This is a command we use to display the download button



In [32]:
audio_url = "https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true" # This is the URL of the audio file we want to download
audio_play = pn.pane.Audio(audio_url, name='Audio', time = 147) # This is a command we use to create a player for the audio
audio_play # This is a command we use to display the player

In [33]:
sentiment = data["sentiment_analysis_results"] # This is the data we get from the API for the sentiment analysis