In [2]:
#Analysis Libraries
import requests
#import wordcloud
from IPython.display import JSON
import pandas as pd
import numpy as np
import time

#Visual Libraries
#from wordcloud import WordCloud, STOPWORDS
import plotly.express as px
%matplotlib inline
import matplotlib.pyplot as plt

#Dashboard Library - Panel/Hvplot (holoviz)
import panel as pn
pn.extension()
import param
import hvplot.pandas

#Additional Libraries
import pickle
from io import StringIO

#Import API
from Keys import Api

#Image
from PIL import Image
from os import path
import os

In [3]:
"""
The following script is to send request and save data for use later
"""

'\nThe following script is to send request and save data for use later\n'

In [5]:
"""
Making a request to Assembly AI
"""
#Submitting my audio file to AssemblyAI API for transcription

endpoint = "https://api.assemblyai.com/v2/transcript"

headers = {
    "authorization": Api.api_key,
    "content-type": "application/json"
}

#Adding auto highlight for detecting important words/phrases
#Adding content-safety for harmful or negative words
#Adding iab_categories for topic detection / description per chapter
#Adding sentiment_analysis for overall review (positive/negative/neutral) for each sentence
#Adding auto_chapters for determining the chapters of the audio file

#Fox Send
fox_json = {
    "audio_url": "https://github.com/DominicTheAnalyst/Fox-CNN-Analysis/blob/main/FOX_Analysis_FBI_Raid.mp3?raw=true",
    "auto_highlights": True,
    "content_safety": True,
    "iab_categories": True,
    "sentiment_analysis": True,
    "auto_chapters": True,
}
fox_response = requests.post(endpoint, json=fox_json, headers=headers)

#CNN Send
cnn_json = {
    "audio_url": "https://github.com/DominicTheAnalyst/Fox-CNN-Analysis/blob/main/CNN_Analysis_FBI_Raid.mp3?raw=true",
    "auto_highlights": True,
    "content_safety": True,
    "iab_categories": True,
    "sentiment_analysis": True,
    "auto_chapters": True,
}
cnn_response = requests.post(endpoint, json=cnn_json, headers=headers)

print("FOX Response JSON: \n\n" + str(fox_response.json()) + "\n\nCNN Response JSON: \n\n" + str(cnn_response.json()))

FOX Response JSON: 

{'id': 'rsc9t0sa1d-e520-45d6-8720-97e068162ddb', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'queued', 'audio_url': 'https://github.com/DominicTheAnalyst/Fox-CNN-Analysis/blob/main/FOX_Analysis_FBI_Raid.mp3?raw=true', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': False, 'redact_pii_audio': False, 'redact_pii_audio_quality': None, 'redact_pii_policies': None, 'redact_pii_sub': None, 'speaker_labels': False, 'content_safety': True, 'iab_categories': True, 'content_safety_labels': {

In [6]:
"""
Get data from request once requirements are met
"""
#While loop to get request only when all satisfactions are complete
fox_result_endpoint = endpoint + "/" + fox_response.json()["id"]
cnn_result_endpoint = endpoint + "/" + cnn_response.json()["id"]
headers_auth = {
    "authorization": Api.api_key,
}

#Fox Fetch
while fox_response.json()['status'] != 'completed':
    fox_response = requests.get(fox_result_endpoint, headers=headers_auth)
    time.sleep(3)

fox_transcript_response = requests.get(fox_result_endpoint, headers=headers_auth)

#CNN Fetch
while cnn_response.json()['status'] != 'completed':
    cnn_response = requests.get(cnn_result_endpoint, headers=headers_auth)
    time.sleep(3)

cnn_transcript_response = requests.get(cnn_result_endpoint, headers=headers_auth)

print("FOX Transcript JSON: \n\n" + str(JSON(fox_transcript_response.json())) + "\n\nCNN Transcript JSON: \n\n" + str(JSON(cnn_transcript_response.json())))

FOX Transcript JSON: 

<IPython.core.display.JSON object>

CNN Transcript JSON: 

<IPython.core.display.JSON object>


In [7]:
"""
Saving data as pickle to be used repeatedly without making multiple requests over different sessions then unpickling to get the data
"""
#Save Pickle

#Save pickle for Fox
with open('fox_speech_data.pkl', 'wb') as f:
    pickle.dump(fox_transcript_response.json().copy(), f)

#Save pickle for CNN
with open('cnn_speech_data.pkl', 'wb') as f:
    pickle.dump(cnn_transcript_response.json().copy(), f)

#Load Pickle

#Load pickle for Fox
with open('fox_speech_data.pkl', 'rb') as f:
    fox_data = pickle.load(f)

#Load pickle for Cnn
with open('cnn_speech_data.pkl', 'rb') as f:
    cnn_data = pickle.load(f)

In [8]:
"""
The following scripts are used to make items for interactive dashboards
"""

'\nThe following scripts are used to make items for interactive dashboards\n'

In [9]:
"""
Making transcript download button
"""
#Download transcript widget for fox
fox_buffer = StringIO()
fox_buffer.write(fox_data["text"])
fox_buffer.seek(0)

fox_transcript_download = pn.widgets.FileDownload(file=fox_buffer, filename="fox_transcript.txt",
                                                  button_type='success')

#Download transcript widget for cnn
cnn_buffer = StringIO()
cnn_buffer.write(cnn_data["text"])
cnn_buffer.seek(0)

cnn_transcript_download = pn.widgets.FileDownload(file=cnn_buffer, filename="cnn_transcript.txt",
                                                  button_type='success')
print("FOX Transcript: \n\n")
fox_transcript_download
print("\n\nCNN Transcript: \n\n")
cnn_transcript_download

FOX Transcript: 




CNN Transcript: 




In [10]:
"""
Making audio play button
"""
#Audio Play for Fox
fox_audio_url = "https://github.com/DominicTheAnalyst/Fox-CNN-Analysis/blob/main/FOX_Analysis_FBI_Raid.mp3?raw=true"
fox_audio_play = pn.pane.Audio(fox_audio_url, name='FOX Audio', time=0)

#Audio Play for Fox
cnn_audio_url = "https://github.com/DominicTheAnalyst/Fox-CNN-Analysis/blob/main/CNN_Analysis_FBI_Raid.mp3?raw=true"
cnn_audio_play = pn.pane.Audio(cnn_audio_url, name='CNN Audio', time=0)

print("FOX Play Button: \n\n")
fox_audio_play
print("\n\nCNN Play Button: \n\n")
cnn_audio_play

FOX Play Button: 




CNN Play Button: 




In [11]:
"""
Making sentiment plot
"""
#FOX Dataframes
fox_sentiment = fox_data["sentiment_analysis_results"]

fox_sentiment_df = pd.DataFrame(fox_sentiment)
fox_sentiment_df = fox_sentiment_df.assign(dataset='fox')

#CNN Dataframes
cnn_sentiment = cnn_data["sentiment_analysis_results"]

cnn_sentiment_df = pd.DataFrame(cnn_sentiment)
cnn_sentiment_df = cnn_sentiment_df.assign(dataset='cnn')

#Combine dataframes
both_sentiment_df = pd.concat([fox_sentiment_df, cnn_sentiment_df])
both_sentiment_df_grouped = both_sentiment_df.groupby(['dataset'])['sentiment'].value_counts().reset_index(
    name='Sentiment Counts')

#Bar plot
sentiment_plot = both_sentiment_df_grouped.hvplot(x='sentiment', y='Sentiment Counts', by='dataset',
                                                  title='Sentences by Sentiment Category', legend='top', kind='bar',
                                                  color='Source', cmap=['blue', 'red'])
pn.Row(sentiment_plot)

positive_df = both_sentiment_df[both_sentiment_df["sentiment"] == "POSITIVE"][["text", "sentiment", 'dataset']]
negative_df = both_sentiment_df[both_sentiment_df["sentiment"] == "NEGATIVE"][["text", "sentiment", 'dataset']]
neutral_df = both_sentiment_df[both_sentiment_df["sentiment"] == "NEUTRAL"][["text", "sentiment", 'dataset']]

sentiment_tabs = pn.Tabs(('Sentiment Overview', sentiment_plot),
                         ('Positive', pn.widgets.DataFrame(positive_df, autosize_mode='fit_columns', width=1000,
                                                           height=300)),
                         ('Negative', pn.widgets.DataFrame(negative_df, autosize_mode='fit_columns', width=1000,
                                                           height=300)),
                         ('Neutral',
                          pn.widgets.DataFrame(neutral_df, autosize_mode='fit_columns', width=1000, height=300))
                         )
sentiment_tabs

In [12]:
"""
Making wordclouds

#FOX WordCloud


#FOX text
fox_transcript = fox_data["text"]
fox_transcript_lower = [item.lower() for item in str(fox_transcript).split()]
fox_all_words = ' '.join(fox_transcript_lower)

#Read mask image
fox_dir = os.getcwd() + '/Pics'
fox_mask = np.array(Image.open(path.join(fox_dir, "fox.jpg")))

#Set stopwords
fox_stopwords = set(STOPWORDS)

#Word cloud plot
fox_wordcloud = WordCloud(background_color='black', max_words=2000, mask=fox_mask, stopwords=fox_stopwords,
                          contour_width=25, contour_color='white', colormap='Reds_r', width=1000, height=1000,
                          collocations=False)

#Generate cloud
fox_wordcloud.generate(fox_all_words)

#Store to file
fox_wordcloud.to_file(path.join(fox_dir, "fox1.jpg"))

#Show
fox_plot = plt.imshow(fox_wordcloud, interpolation='bilinear')
print("FOX WordCloud: \n")
fox_plot


#CNN WordCloud

#CNN text
cnn_transcript = cnn_data["text"]
cnn_transcript_lower = [item.lower() for item in str(cnn_transcript).split()]
cnn_all_words = ' '.join(cnn_transcript_lower)

#Read mask image
cnn_dir = os.getcwd() + '/Pics'
cnn_mask = np.array(Image.open(path.join(cnn_dir, "cnn.jpg")))

#Set stopwords
cnn_stopwords = set(STOPWORDS)

#Word cloud plot
cnn_wordcloud = WordCloud(background_color='black', max_words=2000, mask=cnn_mask, stopwords=cnn_stopwords,
                          contour_width=25, contour_color='white', colormap='Blues_r', width=1000, height=1000,
                          collocations=False)

#Generate cloud
cnn_wordcloud.generate(cnn_all_words)

#Store to file
cnn_wordcloud.to_file(path.join(cnn_dir, "cnn1.jpg"))

#Show
cnn_plot = plt.imshow(cnn_wordcloud, aspect='equal', interpolation='bilinear', alpha=0.9)
cnn_plot
print("CNN WordCloud: \n")
cnn_plot"""

'\nMaking wordclouds\n\n#FOX WordCloud\n\n\n#FOX text\nfox_transcript = fox_data["text"]\nfox_transcript_lower = [item.lower() for item in str(fox_transcript).split()]\nfox_all_words = \' \'.join(fox_transcript_lower)\n\n#Read mask image\nfox_dir = os.getcwd() + \'/Pics\'\nfox_mask = np.array(Image.open(path.join(fox_dir, "fox.jpg")))\n\n#Set stopwords\nfox_stopwords = set(STOPWORDS)\n\n#Word cloud plot\nfox_wordcloud = WordCloud(background_color=\'black\', max_words=2000, mask=fox_mask, stopwords=fox_stopwords,\n                          contour_width=25, contour_color=\'white\', colormap=\'Reds_r\', width=1000, height=1000,\n                          collocations=False)\n\n#Generate cloud\nfox_wordcloud.generate(fox_all_words)\n\n#Store to file\nfox_wordcloud.to_file(path.join(fox_dir, "fox1.jpg"))\n\n#Show\nfox_plot = plt.imshow(fox_wordcloud, interpolation=\'bilinear\')\nprint("FOX WordCloud: \n")\nfox_plot\n\n\n#CNN WordCloud\n\n#CNN text\ncnn_transcript = cnn_data["text"]\ncnn_tr

In [13]:
"""
Controller for words in wordclouds


#FOX Controller
class FoxController(param.Parameterized):
    fox_word_slider = param.Integer(50, bounds=(50,700), step= 50)

controller1 = FoxController()

@pn.depends(controller1.param.fox_word_slider, watch=True)
def update_fox_wordcloud(num_words):
    #Word cloud plot
    fox_word_cloud = WordCloud(background_color='black', max_words = 2000, mask= fox_mask, stopwords= fox_stopwords, contour_width= 25, contour_color= 'white', colormap= 'Reds_r', width= 1000, height= 1000, collocations=False).generate(fox_all_words)

    #Store to file
    fox_word_cloud.to_file(path.join(fox_dir, "fox1.jpg"))

    #Show
    fox_word_cloud_plot = px.imshow(fox_word_cloud)
    fox_word_cloud_plot.update_xaxes(showticklabels=False)
    fox_word_cloud_plot.update_yaxes(showticklabels=False)
    return fox_word_cloud_plot

#CNN Controller
class CnnController(param.Parameterized):
    cnn_word_slider = param.Integer(50, bounds=(50,700), step= 50)

controller2 = CnnController()

@pn.depends(controller2.param.cnn_word_slider, watch=True)
def update_cnn_wordcloud(num_words):
    #Word Cloud plot
    cnn_word_cloud = WordCloud(background_color='black', max_words = 2000, mask= cnn_mask, stopwords= cnn_stopwords, contour_width= 25, contour_color= 'white', colormap= 'Blues_r', width= 1000, height= 1000, collocations=False).generate(cnn_all_words)

    #Store to file
    cnn_word_cloud.to_file(path.join(cnn_dir, "cnn1.jpg"))

    #Show
    cnn_word_cloud_plot = px.imshow(cnn_word_cloud)
    cnn_word_cloud_plot.update_xaxes(showticklabels=False)
    cnn_word_cloud_plot.update_yaxes(showticklabels=False)
    return cnn_word_cloud_plot"""

'\nController for words in wordclouds\n\n\n#FOX Controller\nclass FoxController(param.Parameterized):\n    fox_word_slider = param.Integer(50, bounds=(50,700), step= 50)\n\ncontroller1 = FoxController()\n\n@pn.depends(controller1.param.fox_word_slider, watch=True)\ndef update_fox_wordcloud(num_words):\n    #Word cloud plot\n    fox_word_cloud = WordCloud(background_color=\'black\', max_words = 2000, mask= fox_mask, stopwords= fox_stopwords, contour_width= 25, contour_color= \'white\', colormap= \'Reds_r\', width= 1000, height= 1000, collocations=False).generate(fox_all_words)\n\n    #Store to file\n    fox_word_cloud.to_file(path.join(fox_dir, "fox1.jpg"))\n\n    #Show\n    fox_word_cloud_plot = px.imshow(fox_word_cloud)\n    fox_word_cloud_plot.update_xaxes(showticklabels=False)\n    fox_word_cloud_plot.update_yaxes(showticklabels=False)\n    return fox_word_cloud_plot\n\n#CNN Controller\nclass CnnController(param.Parameterized):\n    cnn_word_slider = param.Integer(50, bounds=(50,700

In [14]:
"""
Create autochapters
"""
#FOX auto chapters

#Get fox chapter headers and summary
fox_chapters = fox_data["chapters"]
fox_chapter_summary = pn.widgets.StaticText(value=fox_chapters[0]['summary'], width=1000, height_policy='fit')

#Make play start button and set audio to play in time frame
fox_button = pn.widgets.Button(name=str(int(fox_chapters[0]['start'] / 1000)), button_type='primary')
fox_chapter_audio = pn.pane.Audio(fox_audio_url, name='Audio', time=round(fox_chapters[0]['start'] / 1000))
print("FOX Chapter Audio:  \n\n")
fox_chapter_audio

#CNN auto chapters

#Get CNN chapter headers and summary
cnn_chapters = cnn_data["chapters"]
cnn_chapter_summary = pn.widgets.StaticText(value=cnn_chapters[0]['summary'], width=1000, height_policy='fit')

#Make play start button and set audio to play in time frame
cnn_button = pn.widgets.Button(name=str(int(cnn_chapters[0]['start'] / 1000)), button_type='primary')
cnn_chapter_audio = pn.pane.Audio(cnn_audio_url, name='Audio', time=round(cnn_chapters[0]['start'] / 1000))
print("CNN Chapter Audio: \n\n")
cnn_chapter_audio

FOX Chapter Audio:  


CNN Chapter Audio: 




In [15]:
#Class to make button usable with audio player
class ButtonAudio():

    def __init__(self, start_time):

        self.start_time = start_time

        #FOX
        self.fox_button = pn.widgets.Button(name=str(int(self.start_time/1000)), button_type= 'primary', width= 60)
        self.fox_chapter_audio = pn.pane.Audio(fox_audio_url, name='Audio', time= round(self.start_time/1000))
        self.fox_button.on_click(self.fox_move_audio_head)

        #CNN
        self.cnn_button = pn.widgets.Button(name=str(int(self.start_time/1000)), button_type= 'primary', width= 60)
        self.cnn_chapter_audio = pn.pane.Audio(cnn_audio_url, name='Audio', time= round(self.start_time/1000))
        self.cnn_button.on_click(self.cnn_move_audio_head)

    def fox_move_audio_head(self, event):

        self.fox_chapter_audio.time = self.start_time/1000

    def cnn_move_audio_head(self, event):

        self.cnn_chapter_audio.time = self.start_time/1000

In [16]:
"""
Create layout for audio and descriptions
"""
#Create chapter summary headers
fox_chapters_layout = pn.Column(pn.pane.Markdown("### FOX Audio Auto Chapter Summary"))
cnn_chapters_layout = pn.Column(pn.pane.Markdown("### CNN Audio Auto Chapter Summary"))

#FOX layout
for fox_chapter in fox_chapters:
    fox_chapter_summary = pn.widgets.StaticText(value=fox_chapter['summary'], width=1000, height_policy='fit')
    fox_button_audio = ButtonAudio(fox_chapter['start'])
    fox_button = fox_button_audio.fox_button
    fox_chapter_audio = fox_button_audio.fox_chapter_audio
    fox_chapters_layout.append(
        pn.Row(pn.Column(fox_button), pn.Column(fox_chapter_audio), pn.Column(fox_chapter_summary)))

#CNN layout
for cnn_chapter in cnn_chapters:
    cnn_chapter_summary = pn.widgets.StaticText(value=cnn_chapter['summary'], width=1000, height_policy='fit')
    cnn_button_audio = ButtonAudio(cnn_chapter['start'])
    cnn_button = cnn_button_audio.cnn_button
    cnn_chapter_audio = cnn_button_audio.cnn_chapter_audio
    cnn_chapters_layout.append(
        pn.Row(pn.Column(cnn_button), pn.Column(cnn_chapter_audio), pn.Column(cnn_chapter_summary)))

layout = fox_chapters_layout + cnn_chapters_layout
layout

In [17]:
"""
Making highlights scatter plot
"""
#FOX df
fox_highlights = fox_data['auto_highlights_result']['results']
fox_highlights_df = pd.DataFrame(fox_highlights)
fox_highlights_df = fox_highlights_df.assign(dataset='fox')

#CNN df
cnn_highlights = cnn_data['auto_highlights_result']['results']
cnn_highlights_df = pd.DataFrame(cnn_highlights)
cnn_highlights_df = cnn_highlights_df.assign(dataset='cnn')

#Concat into one
grouped_highlights_df = pd.concat([fox_highlights_df, cnn_highlights_df])

#Group by
grouped_highlights_df = grouped_highlights_df.groupby(['dataset', 'count', 'rank'])['text'].apply(
    ', \n'.join).reset_index()
grouped_highlights_df

#Scatter plot
highlights_plot = grouped_highlights_df.hvplot.points(x='count', y='rank', padding=0.4, hover_cols='all',
                                                      width=2000, height=600, size=100,
                                                      title='Freq of Auto Highlighted Words', cmap=['red', 'blue'],
                                                      by='dataset') * \
                  grouped_highlights_df.hvplot.labels(x='count', y='rank', text='text', text_baseline='top',
                                                      hover=False)
pn.Row(highlights_plot)

In [18]:
"""
Creating dashboard
"""
#Dashboard
dashboard = pn.template.FastListTemplate(
    title='FOX vs CNN Audio Content Explorer on Trump FBI Raid 2022',
    sidebar=[
        pn.pane.Markdown('# Explore Audio Content'),
        pn.pane.Markdown(
            '### This app analyzes the content of your audio file, including sentiment, wordcloud, automatic content summary and highlights using AssemblyAI API.'),
        pn.pane.Markdown(
            '### This example is based on the audio content of FOX and CNN reaction of Trump being raided by the FBI.'),
        pn.pane.Markdown(
            '### [Link to Fox video!](https://www.foxnews.com/media/fbi-raid-on-trumps-mar-a-lago-celebrated-leftists-beautiful-raid)    [Link to CNN video!](https://www.youtube.com/watch?v=62Z4MnFIifY&t=1s)'),
        pn.pane.Markdown('### Download Audio Transcripts:'),
        fox_transcript_download,
        cnn_transcript_download
    ],
    main=[
        pn.Row(
            pn.Column(sentiment_tabs)
        ),
        pn.Row(
            pn.Column(
                pn.pane.Markdown('# FOX WordCloud on Speech Content'),
                #pn.Row(controller1.param.fox_word_slider),
                #pn.Row(update_fox_wordcloud, title="FOX WordCloud on Speech Content")
            ),
            pn.Column(
                pn.pane.Markdown('# CNN WordCloud on Speech Content'),
                #pn.Row(controller2.param.cnn_word_slider),
                #pn.Row(update_cnn_wordcloud, title="CNN WordCloud on Speech Content")
            )
        ),
        pn.Row(layout),
        pn.Row(highlights_plot, title='Automatic Highlights')
    ],
    accent_base_color="#667b92",
    header_background="#e3e0cf"
)
dashboard.show()



Launching server at http://localhost:59009


<bokeh.server.server.Server at 0x2950adb2b00>