In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import requests 
from bs4 import BeautifulSoup as soup

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'} 


# Getting The Dataframe Setup

In [56]:
url = 'https://www.presidency.ucsb.edu/documents/app-categories/spoken-addresses-and-remarks/presidential/inaugural-addresses?items_per_page=60'
raw = requests.get(url,headers=header)


In [57]:
bsObj = soup(raw.content,'html.parser') # Parse the html
links = bsObj.find_all('div', class_="field-title") 
names = bsObj.find_all('div', class_="col-sm-4 margin-top")




In [58]:
president_names = [name.p.a.text for name in names]

speech_links = [title.find('a')['href'] for title in links]



In [59]:
# add second list
url = 'https://www.presidency.ucsb.edu/documents/app-categories/spoken-addresses-and-remarks/presidential/inaugural-addresses?items_per_page=60&page=1'
raw = requests.get(url,headers=header)



In [60]:
bsObj = soup(raw.content,'html.parser') 
links = bsObj.find_all('div', class_="field-title") 
names = bsObj.find_all('div', class_="col-sm-4 margin-top")




In [61]:
president_names.extend([name.p.a.text for name in names])

speech_links.extend([title.find('a')['href'] for title in links])



In [152]:
data = {'Name': president_names,
        'Link': speech_links}

# Create DataFrame
df = pd.DataFrame(data)

df


Unnamed: 0,Name,Link
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53
2,Donald J. Trump (1st Term),/documents/inaugural-address-14
3,Barack Obama,/documents/inaugural-address-15
4,Barack Obama,/documents/inaugural-address-5
...,...,...
58,Thomas Jefferson,/documents/inaugural-address-20
59,Thomas Jefferson,/documents/inaugural-address-19
60,John Adams,/documents/inaugural-address-18
61,George Washington,/documents/inaugural-address-17


# Each President Speech

In [66]:
base_url = 'https://www.presidency.ucsb.edu/'


In [150]:
#method to search

unifying_words = ["together", "unity", "united", "one nation", "one people", "common good", "shared purpose", "cooperation", "reconciliation", "harmony", "bridge divides", "come together", "join hands", "heal", "bond", "mutual respect", "compromise", "bipartisan", "consensus", "all americans", "fellow citizens", "national spirit", "collective effort", "civic spirit", "shared prosperity", "common destiny", "national renewal", "rebuilding trust", "strength in diversity", "enduring values", "our shared history", "bridging differences", "fostering dialogue", "shared responsibility", "working side by side", "democratic principles", "collective resilience", "moral courage", "upholding our traditions", "healing divisions", "inclusive leadership", "neighborly love", "faith in each other", "building for future generations", "honoring our past", "a more perfect union", "promise of America", "shared sacrifice", "rising together", "responsibility to one another", "mutual understanding", "forging a new path", "hand in hand", "strengthening our democracy", "joining forces", "seeking harmony", "one future", "unity of purpose", "guiding principles", "national stability", "rekindling hope"]

polarizing_words = ["silent majority", "real americans", "true patriots", "taking back", "saving", "making America great again", "ideological battle", "radical", "corrupt", "enemy", "betrayal", "stolen", "attack", "disgrace", "destroy", "defeat", "overthrow", "rigged", "illegitimate", "danger", "threat", "invasion", "catastrophe", "collapse", "disaster", "crime wave", "carnage", "poisoned", "crisis", "war on", "taking away", "crushing", "oppressors", "false narratives", "treasonous", "swamp", "deep state", "elite class", "failed policies", "tyranny", "deception", "conspiracy", "subversion", "internal sabotage", "recklessness", "failure", "incompetence", "betrayal of trust", "disgraceful actions", "puppet masters", "dark forces", "the great betrayal", "traitorous", "anti-American", "illegitimate rulers", "selling out our country", "globalist agenda", "failed leadership", "the great deception", "two-faced politicians", "hypocrites", "reckoning day", "stolen future", "collapsing system", "deep-rooted rot", "false leaders", "bureaucratic swamp", "anti-democratic forces", "dismantling our freedoms", "forced submission", "creeping tyranny", "puppet class", "those who seek to control us", "abandoned values", "destabilization", "surrendering sovereignty", "political fraud", "rotten to the core", "weak leadership", "orchestrated chaos", "national decline", "ceding our power", "hidden agenda", "unwavering resolve", "history is on our side", "holding the line", "a turning point", "staying vigilant", "rising tide", "the people's mandate", "course correction", "true defenders", "restoring what was lost", "holding power accountable", "demanding justice", "aliens", "criminals", "rapists"]


def get_unifying_words(text):
    total = 0
    for phrase in unifying_words:
        count = text.count(phrase)
        total += count
            
    return total

def get_polarizing_words(text):
    total = 0
    for phrase in polarizing_words:
        count = text.count(phrase)
        total += count
            
    return total

def get_total_word_count(text):
    total = 0
    words = text.strip().split(' ') # Split the text into words
    #print(words)
    for word in words:
        if word != '':
            total += 1
    return total



In [142]:
unifying_words_count = []
polarizing_words_count = []
total_word_count = []

for end in df['Link'].values.tolist():
    url = base_url + end
    print(url)
    raw = requests.get(url,headers=header)
    bsObj = soup(raw.content,'html.parser') 
    
    speech = bsObj.find_all(class_="field-docs-content") 
    
    #replace all new lines and get rid of empty spaces and make sure every thing is lower case before putting it into a string
    speech_text = ("".join([s.get_text().replace('\n', ' ') for s in speech])).lower() 
    speech_text = speech_text.replace("(applause.)", "") # gets rid of applause pauses
    unifying_words_count.append(get_unifying_words(speech_text))
    polarizing_words_count.append(get_polarizing_words(speech_text))
    total_word_count.append(get_total_word_count(speech_text))
    #print(speech_text)
    #print(get_unifying_words(speech_text))
    #print(get_polarizing_words(speech_text))
    #df

https://www.presidency.ucsb.edu//documents/inaugural-address-54
https://www.presidency.ucsb.edu//documents/inaugural-address-53
https://www.presidency.ucsb.edu//documents/inaugural-address-14
https://www.presidency.ucsb.edu//documents/inaugural-address-15
https://www.presidency.ucsb.edu//documents/inaugural-address-5
https://www.presidency.ucsb.edu//documents/inaugural-address-13
https://www.presidency.ucsb.edu//documents/inaugural-address-52
https://www.presidency.ucsb.edu//documents/inaugural-address-12
https://www.presidency.ucsb.edu//documents/inaugural-address-51
https://www.presidency.ucsb.edu//documents/inaugural-address
https://www.presidency.ucsb.edu//documents/inaugural-address-10
https://www.presidency.ucsb.edu//documents/inaugural-address-11
https://www.presidency.ucsb.edu//documents/inaugural-address-0
https://www.presidency.ucsb.edu//documents/oath-office-and-second-inaugural-address
https://www.presidency.ucsb.edu//documents/inaugural-address-1
https://www.presidency.ucs

In [143]:
df['Unifying Words Count'] = unifying_words_count 
df['Polarizing Words Count'] = polarizing_words_count 
df['Total Words Count']  = total_word_count 


In [144]:
df

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54,20,23,2935
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53,31,6,2532
2,Donald J. Trump (1st Term),/documents/inaugural-address-14,12,4,1455
3,Barack Obama,/documents/inaugural-address-15,15,5,2090
4,Barack Obama,/documents/inaugural-address-5,13,15,2391
...,...,...,...,...,...
58,Thomas Jefferson,/documents/inaugural-address-20,4,3,2157
59,Thomas Jefferson,/documents/inaugural-address-19,2,1,1717
60,John Adams,/documents/inaugural-address-18,4,7,2319
61,George Washington,/documents/inaugural-address-17,2,0,135


# Adding Political Party Affiliation

In [None]:
# insert political party for every president speech 