In [1]:
import requests # This is the library we use to make HTTP requests
from IPython.display import JSON # This is the library we use to display JSON nicely in the notebook
import pandas as pd # This is the library we use to display data nicely in the notebook
import numpy as np # This is the library we use to do math
import time # This is the library we use to get the current time (for timestamps)
import os # This is the library we use to get the current working directory

#Visualizations
from wordcloud import WordCloud, STOPWORDS # This is the library we use to generate word clouds from text data, and to filter out common words
import plotly.express as px # This is the library we use to generate interactive visualizations (like the map) from data

#Panel/hvplot 
import panel as pn # panel is a library that allows us to create interactive visualizations 
pn.extension() # pn.extension() is a command we use to enable the interactive features of the library 
import param # param is a library that allows us to create interactive visualizations
import hvplot.pandas # hvplot is a library that allows us to create interactive visualizations

#Others
import pickle # This is the library we use to save and load data from files
from io import StringIO # This is the library we use to read data from strings (like the CSV data we get from the API)


In [2]:
# API key
from api_key import API_KEY # This is the file we created to store our API key in a variable called API_KEY

In [3]:
# Submitting Files for Transcription
import requests
endpoint = "https://api.assemblyai.com/v2/transcript"
json = {
    "audio_url": "https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true",
    "auto_highlights": True,
    "sentiment_analysis": True,
    "auto_chapters": True,
    "iab_categories": True,
}
headers = {
    "authorization": API_KEY,
    "content-type": "application/json"
}
response = requests.post(endpoint, json=json, headers=headers)
print(response.json())

{'id': 'ovb72i0fwz-ef7f-4638-b3fc-ffeecb5630f6', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'queued', 'audio_url': 'https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': False, 'redact_pii_audio': False, 'redact_pii_audio_quality': None, 'redact_pii_policies': None, 'redact_pii_sub': None, 'speaker_labels': False, 'content_safety': False, 'iab_categories': True, 'content_safety_labels': {}, 'iab_categories_res

In [4]:
result_endpoint = f"{endpoint}/" + response.json()["id"] # This is the endpoint we use to get the results of the transcription
headers_auth = {
    "authorization": API_KEY, # This is the header we use to authenticate our request
}
transcript_response = requests.get(result_endpoint, headers=headers_auth) # This is the response we get from the API when we make a GET request to the endpoint
print(transcript_response.json()) # This is the JSON data we get from the API 

# While the transcription is still processing, we wait for it to finish
while transcript_response.json()["status"] == "processing":
    time.sleep(1) # This is a command we use to wait for 1 second
    transcript_response = requests.get(result_endpoint, headers=headers_auth) # This is the response we get from the API when we make a GET request to the endpoint
    print(transcript_response.json()) # This is the JSON data we get from the API

{'id': 'ovb72i0fwz-ef7f-4638-b3fc-ffeecb5630f6', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'processing', 'audio_url': 'https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': False, 'redact_pii_audio': False, 'redact_pii_audio_quality': None, 'redact_pii_policies': None, 'redact_pii_sub': None, 'speaker_labels': False, 'content_safety': False, 'iab_categories': True, 'content_safety_labels': {}, 'iab_categories

In [5]:
JSON(transcript_response.json()) # This is a command we use to display the JSON data nicely in the notebook 

<IPython.core.display.JSON object>

In [6]:
# Save pickle
with open('speech_data.pkl', 'wb') as f: 
    pickle.dump(transcript_response.json().copy(), f) # This is a command we use to save the data to a file called speech_data.pkl

In [7]:
# Load data pickle
with open('speech_data.pkl', 'rb') as f:
    data = pickle.load(f) # This is a command we use to load the data from the file called speech_data.pkl

In [8]:
buffer = StringIO() # This is a command we use to create a buffer to hold the data
buffer.write(data["text"]) # This is a command we use to write the data to the buffer
buffer.seek(0) # This is a command we use to move the buffer to the beginning of the data

0

In [9]:
from fileinput import filename # This is a command we use to get the name of the file we are reading from
transcript_download = pn.widgets.FileDownload(buffer, filename="transcript.txt", # This is a command we use to create a download button for the transcript
                                                botton_type="primary") # This is a command we use to style the download button
transcript_download # This is a command we use to display the download button



In [10]:
audio_url = "https://github.com/KevinArce/AudioAI/blob/master/Harry%20Styles%20-%20As%20It%20Was.mp3?raw=true" # This is the URL of the audio file we want to download
audio_play = pn.pane.Audio(audio_url, name='Audio', time = 147) # This is a command we use to create a player for the audio
audio_play # This is a command we use to display the player

In [11]:
sentiment = data["sentiment_analysis_results"] # This is the data we get from the API for the sentiment analysis

In [13]:
sentiment_df = pd.DataFrame(sentiment) # This is a command we use to create a dataframe from the data we get from the API
sentiment_df # This is a command we use to display the dataframe

Unnamed: 0,text,start,end,sentiment,confidence,speaker
0,"Come on, Harry.",310,1054,NEUTRAL,0.697529,
1,We wanna say goodnight to you.,1102,3320,NEUTRAL,0.539464,
2,You always knew.,20790,21826,NEUTRAL,0.582425,
3,Why don't we leave it?,21888,23182,NEUTRAL,0.684918,
4,There's nothing to say and everything gets in ...,23256,29400,NEGATIVE,0.857522,
5,You know it's not the same as it was in this w...,44230,50500,NEGATIVE,0.774149,
6,It just loved the same as it was you.,51550,167080,POSITIVE,0.675335,


In [24]:
sentiment_df_grouped = sentiment_df['sentiment'].value_counts() # This is a command we use to create a dataframe with the sentiment analysis results grouped by sentiment and the number of times each sentiment was used
sentiment_df_grouped # This is a command we use to display the dataframe

NEUTRAL     4
NEGATIVE    2
POSITIVE    1
Name: sentiment, dtype: int64

In [25]:
# Bar plot
sentiment_plot = sentiment_df_grouped.hvplot(kind="bar", title="Sentiment Analysis") # This is a command we use to create a bar plot of the dataframe
pn.Row(sentiment_plot) # This is a command we use to display the bar plot in a row

In [27]:
positive_df = sentiment_df[sentiment_df["sentiment"] == "POSITIVE"][["text", "sentiment"]] # This is a command we use to create a dataframe of the positive sentiment analysis results
negative_df = sentiment_df[sentiment_df["sentiment"] == "NEGATIVE"][["text", "sentiment"]] # This is a command we use to create a dataframe of the negative sentiment analysis results
neutral_df = sentiment_df[sentiment_df["sentiment"] == "NEUTRAL"][["text", "sentiment"]] # This is a command we use to create a dataframe of the neutral sentiment analysis results

sentiment_tabs = pn.Tabs(('Sentiment overview', sentiment_plot), # This is a command we use to create a tab with the sentiment overview plot
                       ('Positive', pn.widgets.DataFrame(positive_df, autosize_mode='fit_columns', width=700, height=300)), 
                       ('Negative', pn.widgets.DataFrame(negative_df, autosize_mode='fit_columns', width=700, height=300)),
                       ('Neutral', pn.widgets.DataFrame(neutral_df, autosize_mode='fit_columns', width=700, height=300))
                        )
sentiment_tabs # This is a command we use to display the tabs

In [28]:
stopwords = set(STOPWORDS) # This is a command we use to create a set of stopwords

In [29]:
transcript = data["text"] # This is the transcript we get from the API

In [30]:
transcript_lower = [item.lower() for item in str(transcript).split()] # This is a command we use to convert the transcript to lowercase
transcript_lower # This is a command we use to display the transcript in lowercase

['come',
 'on,',
 'harry.',
 'we',
 'wanna',
 'say',
 'goodnight',
 'to',
 'you.',
 'you',
 'always',
 'knew.',
 'why',
 "don't",
 'we',
 'leave',
 'it?',
 "there's",
 'nothing',
 'to',
 'say',
 'and',
 'everything',
 'gets',
 'in',
 'the',
 'way.',
 'you',
 'know',
 "it's",
 'not',
 'the',
 'same',
 'as',
 'it',
 'was',
 'in',
 'this',
 'world.',
 'it',
 'just',
 'loved',
 'the',
 'same',
 'as',
 'it',
 'was',
 'you.']

In [31]:
all_words = ' '.join(transcript_lower) # This is a command we use to join the transcript in lowercase into a single string 
all_words # This is a command we use to display the transcript in lowercase

"come on, harry. we wanna say goodnight to you. you always knew. why don't we leave it? there's nothing to say and everything gets in the way. you know it's not the same as it was in this world. it just loved the same as it was you."