# Data structuring

In [868]:
# Loading packages

import requests
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import re
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


In [869]:
# Load df from disk

df = pd.read_csv("speeches/data.csv")

In [870]:
#removing the index coloumn 
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Date,Speech,Type,Party
0,0,Donald Trump,"February 05, 2019","\r\n \r\n \r\nMadam Speaker, Mr. Vic...",State of the Union,Republican
1,1,Donald Trump,"January 30, 2018","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Republican
2,2,Donald Trump,"January 20, 2017","\r\n \r\n Chief Justice Roberts, Pre...",Inaugural Address,Republican
3,3,Barack Obama,"January 12, 2016","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Democrat
4,4,Barack Obama,"January 20, 2015","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Democrat


In [871]:
len(df)

250

In [872]:
df["Speech"] = df["Speech"].str.replace("transcript-inner", "").str.replace(u'\xa0', u' ')

In [873]:
print(df['Speech'].loc[9])


      
      Mr. Speaker, Mr. Vice President, members of Congress, distinguished guests, and fellow Americans:

      Tonight I want to begin by congratulating the men and women of the 112th Congress, as well as your new Speaker, John Boehner.  (Applause.)  And as we mark this occasion, we’re also mindful of the empty chair in this chamber, and we pray for the health of our colleague -- and our friend -– Gabby Giffords.  (Applause.)

      It’s no secret that those of us here tonight have had our differences over the last two years.  The debates have been contentious; we have fought fiercely for our beliefs.  And that’s a good thing.  That’s what a robust democracy demands.  That’s what helps set us apart as a nation.

      But there’s a reason the tragedy in Tucson gave us pause. Amid all the noise and passion and rancor of our public debate, Tucson reminded us that no matter who we are or where we come from, each of us is a part of something greater -– something more conseq

In [874]:
# drops party orientation that isn't Rep or Dem
df2 = df.drop(df.index[172:250])
#df2.drop('Unnamed: 0', axis = 1)
df2.tail()

Unnamed: 0.1,Unnamed: 0,Name,Date,Speech,Type,Party
167,167,Franklin Pierce,"December 02, 1856",Fellow-Citizens of the Senate and of the House...,State of the Union,Democrat
168,168,Franklin Pierce,"December 31, 1855",Fellow-Citizens of the Senate and of the House...,State of the Union,Democrat
169,169,Franklin Pierce,"December 04, 1854",Fellow-Citizens of the Senate and of the House...,State of the Union,Democrat
170,170,Franklin Pierce,"December 05, 1853",Fellow-Citizens of the Senate and of the House...,State of the Union,Democrat
171,171,Franklin Pierce,"March 04, 1853",My Countrymen: It a relief to feel that no hea...,Inaugural Address,Democrat


In [875]:
# Creates party dummy
Party_dummy = pd.get_dummies(df2['Party'], drop_first=True)
df2['Republican'] = Party_dummy
print(len(df2))

172


In [876]:
#Extracting year alone from the Date column
df2["year"] = df2["Date"].apply(lambda x : int(x.split(",")[2])  if len(x.split(","))==3 else int(x.split(",")[1]) )
df2.head()

Unnamed: 0.1,Unnamed: 0,Name,Date,Speech,Type,Party,Republican,year
0,0,Donald Trump,"February 05, 2019","\r\n \r\n \r\nMadam Speaker, Mr. Vic...",State of the Union,Republican,1,2019
1,1,Donald Trump,"January 30, 2018","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Republican,1,2018
2,2,Donald Trump,"January 20, 2017","\r\n \r\n Chief Justice Roberts, Pre...",Inaugural Address,Republican,1,2017
3,3,Barack Obama,"January 12, 2016","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Democrat,0,2016
4,4,Barack Obama,"January 20, 2015","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Democrat,0,2015


# Cleaning Data 

- Removing \r \n etc. 
- Removing upper case etc. 
- Punctuation as they will not have any predictive power. 
- Removing possessive pronoun terminations
- Stemming and Lemmatization


In [877]:
# \r and \n
df2['Speech_Parsed_1'] = df2['Speech'].str.replace("\r", " ")
df2['Speech_Parsed_1'] = df2['Speech_Parsed_1'].str.replace("\n", " ")
df2['Speech_Parsed_1'] = df2['Speech_Parsed_1'].str.replace("    ", " ")

In [878]:
# Lowercasing the text
df2['Speech_Parsed_2'] = df2['Speech_Parsed_1'].str.lower()

In [879]:
punctuation_signs = list("?:!.,;")
df2['Speech_Parsed_3'] = df2['Speech_Parsed_2']

for punct_sign in punctuation_signs:
    df2['Speech_Parsed_3'] = df2['Speech_Parsed_3'].str.replace(punct_sign, '')

In [880]:
df2['Speech_Parsed_4'] = df2['Speech_Parsed_3'].str.replace("'s", "")

In [881]:
# Downloading punkt and wordnet from NLTK
nltk.download('punkt')
#print("------------------------------------------------------------")
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ester\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ester\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [882]:
# Saving the lemmatizer into an object
wordnet_lemmatizer = WordNetLemmatizer()

nrows = len(df2)
lemmatized_text_list = []

for row in range(0, nrows):
    
    # Create an empty list containing lemmatized words
    lemmatized_list = []
    
    # Save the text and its words into an object
    text = df2.loc[row]['Speech_Parsed_4']
    text_words = text.split(" ")

    # Iterate through every word to lemmatize
    for word in text_words:
        lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
        
    # Join the list
    lemmatized_text = " ".join(lemmatized_list)
    
    # Append to the list containing the texts
    lemmatized_text_list.append(lemmatized_text)
    
df2['Speech_Parsed_5'] = lemmatized_text_list

In [883]:
# Downloading the stop words list
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ester\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [884]:
# Loading the stop words in english
stop_words = list(stopwords.words('english'))
stop_words[0:10]

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]

In [885]:
example = "me eating a meal"
word = "me"

# The regular expression is:
regex = r"\b" + word + r"\b"  # we need to build it like that to work properly

re.sub(regex, "StopWord", example)

'StopWord eating a meal'

In [886]:
df2['Speech_Parsed_6'] = df2['Speech_Parsed_5']

for stop_word in stop_words:

    regex_stopword = r"\b" + stop_word + r"\b"
    df2['Speech_Parsed_6'] = df2['Speech_Parsed_6'].str.replace(regex_stopword, '')

In [887]:
df2.head(1)

Unnamed: 0.1,Unnamed: 0,Name,Date,Speech,Type,Party,Republican,year,Speech_Parsed_1,Speech_Parsed_2,Speech_Parsed_3,Speech_Parsed_4,Speech_Parsed_5,Speech_Parsed_6
0,0,Donald Trump,"February 05, 2019","\r\n \r\n \r\nMadam Speaker, Mr. Vic...",State of the Union,Republican,1,2019,"Madam Speaker, Mr. Vice President, Membe...","madam speaker, mr. vice president, membe...",madam speaker mr vice president members ...,madam speaker mr vice president members ...,madam speaker mr vice president members ...,madam speaker mr vice president members ...


# Creating variables 

In [888]:
#Number of words in each speech
df2["word_count"] = df2["Speech_Parsed_3"].apply(lambda x : len(x))
df2["some_word_count"] = df2["Speech_Parsed_6"].apply(lambda x : len(x))

#Number of unique words in each speech
df2["unique_word"] = df2["Speech_Parsed_3"].apply(lambda x : len(set(x.lower().split()) ) )
df2["some_unique_word"] = df2["Speech_Parsed_6"].apply(lambda x : len(set(x.lower().split()) ) )

#Number of unique words ratio in each speech
df2["unique_word_ratio"] = df2.apply(lambda x : x["unique_word"]/x["word_count"] ,axis=1)
df2["some_unique_word_ratio"] = df2.apply(lambda x : x["some_unique_word"]/x["some_word_count"] ,axis=1)

In [889]:
print('Max word count', max(df2['word_count']))
print('Min word count', min(df2['word_count']))

Max word count 164834
Min word count 2951


In [890]:
# initialize tokenizer
#import nltk
#nltk.download('punkt')

from nltk import sent_tokenize, word_tokenize
#tokenizer = sent_tokenize

# Download positive and negative sentiment lexicons, split into words and convert to set representation. 
negative = set(requests.get('http://ptrckprry.com/course/ssd/data/negative-words.txt').text.split(';\n')[-1].split('\n'))
positive = set(requests.get('http://ptrckprry.com/course/ssd/data/positive-words.txt').text.split(';\n')[-1].split('\n'))
print(len(negative),len(positive))

4784 2007


In [891]:
# define function
def sent_preproc(string):
    return sent_tokenize(string.lower())

def word_preproc(string):
    return word_tokenize(string.lower())


In [892]:
# apply function
sentences = df2.Speech.apply(sent_preproc)

words = df2.Speech.apply(word_preproc)

some_words=df2.Speech_Parsed_6.apply(word_preproc)


In [893]:
# Defines function for counting number of matches with pos/neg words
def count_dictionary(tokenized_doc,dictionary):
    return len([word for word in tokenized_doc if word in dictionary])

In [894]:
df2['positive_words'] = some_words.apply(count_dictionary,dictionary=positive)
df2['negative_words'] = some_words.apply(count_dictionary,dictionary=negative)

# positive and negative words to total words ration
df2["positive_words_ratio"] = df2['positive_words']/df2['some_word_count']
df2["negative_words_ratio"] = df2['negative_words']/df2['some_word_count']

In [895]:
df2["words"]=words
df2["sentences"]=sentences

def count_word(array, word):
    return array.count(word)

df2['we_count'] = df2.words.apply(lambda a: count_word(a, 'we'))
df2['our_count'] = df2.words.apply(lambda a: count_word(a, 'our'))
df2['us_count'] = df2.words.apply(lambda a: count_word(a, 'us'))
df2['america_count'] = df2.words.apply(lambda a: count_word(a, 'america'))
df2['immigrants_count'] = df2.words.apply(lambda a: count_word(a, 'immigrants'))
df2['black_count'] = df2.words.apply(lambda a: count_word(a, 'black'))
df2['war_count'] = df2.words.apply(lambda a: count_word(a, 'war'))
df2['them_count'] = df2.words.apply(lambda a: count_word(a, 'them'))
df2['i_count'] = df2.words.apply(lambda a: count_word(a, 'i'))
df2['united_count'] = df2.words.apply(lambda a: count_word(a, 'united'))
df2['men_count'] = df2.words.apply(lambda a: count_word(a, 'men'))
df2['women_count'] = df2.words.apply(lambda a: count_word(a, 'women'))
df2['gender_count'] = df2.words.apply(lambda a: count_word(a, 'gender'))
df2['job_count'] = df2.words.apply(lambda a: count_word(a, 'job'))
df2['health_count'] = df2.words.apply(lambda a: count_word(a, 'health'))
df2['china_count'] = df2.words.apply(lambda a: count_word(a, 'china'))
df2['climate_count'] = df2.words.apply(lambda a: count_word(a, 'climate'))
df2['moon_count'] = df2.words.apply(lambda a: count_word(a, 'moon'))


In [896]:
df_ia['we_count']=df2['we_count']
df_ia['our_count']=df2['our_count']
df_ia['us_count']=df2['us_count']
df_ia['america_count']=df2['america_count']
df_ia['immigrants_count']=df2['immigrants_count']
df_ia['them_count']=df2['them_count']
df_ia['i_count']=df2['i_count']
df_ia['job_count']=df2['job_count']
df_ia['health_count']=df2['health_count']
df_ia['united_count']=df2['united_count']
df_ia['men_count']=df2['men_count']
df_ia['women_count']=df2['women_count']
df_ia['war_count']=df2['war_count']
df_ia['china_count']=df2['china_count']
df_ia['climate_count']=df2['climate_count']
df_ia['moon_count']=df2['moon_count']


df_sotu['we_count']=df2['we_count']
df_sotu['our_count']=df2['our_count']
df_sotu['us_count']=df2['us_count']
df_sotu['america_count']=df2['america_count']
df_sotu['immigrants_count']=df2['immigrants_count']
df_sotu['them_count']=df2['them_count']
df_sotu['i_count']=df2['i_count']
df_sotu['job_count']=df2['job_count']
df_sotu['health_count']=df2['health_count']
df_sotu['united_count']=df2['united_count']
df_sotu['men_count']=df2['men_count']
df_sotu['women_count']=df2['women_count']
df_sotu['war_count']=df2['war_count']
df_sotu['china_count']=df2['china_count']
df_sotu['climate_count']=df2['climate_count']
df_sotu['moon_count']=df2['moon_count']


In [897]:
df_sotu=df2.copy()
df_ia=df2.copy()
df_rep=df2.copy()
df_demo=df2.copy()

In [899]:
indexNames = df_ia[df_ia['Type'] == 'State of the Union'].index
df_ia.drop(indexNames , inplace=True)
df_ia.to_csv("df_ia.csv", index=False)

indexNames = df_sotu[df_sotu['Type'] == 'Inaugural Address'].index
df_sotu.drop(indexNames , inplace=True)
df_sotu.to_csv("df_sotu.csv", index=False)


indexNames = df_rep[df_rep['Party'] == 'Democrat'].index
df_rep.drop(indexNames , inplace=True)
df_rep.to_csv("df_rep.csv", index=False)

indexNames = df_demo[df_demo['Party'] == 'Republican'].index
df_demo.drop(indexNames , inplace=True)
df_demo.to_csv("df_demo.csv", index=False)

df2.to_csv("df2.csv", index=False)

In [900]:
df_demo.head(1)

Unnamed: 0.1,Unnamed: 0,Name,Date,Speech,Type,Party,Republican,year,Speech_Parsed_1,Speech_Parsed_2,...,i_count,united_count,men_count,women_count,gender_count,job_count,health_count,china_count,climate_count,moon_count
3,3,Barack Obama,"January 12, 2016","\r\n \r\n Mr. Speaker, Mr. Vice Pres...",State of the Union,Democrat,0,2016,"Mr. Speaker, Mr. Vice President, Members o...","mr. speaker, mr. vice president, members o...",...,69,4,0,0,0,11,5,1,4,1


 # Plots  

In [901]:
# state of the union dataset
df_speech_des = df2.groupby('Type').describe().head()

In [835]:
df_speech_des.head()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Republican,Republican,...,unique_word_ratio,unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio,some_unique_word_ratio
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Inaugural Address,39.0,83.692308,48.086082,2.0,52.0,73.0,122.5,171.0,39.0,0.589744,...,0.069684,0.0898,39.0,0.07357,0.009711,0.053012,0.067399,0.073429,0.079542,0.099951
State of the Union,133.0,86.030075,50.451888,0.0,40.0,89.0,129.0,170.0,133.0,0.646617,...,0.048634,0.060499,133.0,0.046431,0.009205,0.024806,0.039565,0.046136,0.054211,0.066113


In [836]:
#df_speech_des['word_count']
#with open('mytable.tex','w') as tf:
 #   tf.write(df_by_speech.to_latex())

# Text analysis

## Sentiment analysis using VADER

In [59]:
#!pip install vaderSentiment

In [60]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [61]:
# using polarity_scores() to obtain the polarity indices for given sentence
def sentiment_analyzer_scores(sentence):
    score = analyzer.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))

In [62]:
# THIS TAKES A LONG TIME RUNNING, SO DON'T RUN IT JUST FOR FUN

# This doesn't work as well as the code below, but we might have
# to find a way to do a function that applies the VADER to
# every single row... if that isn't what we are doing below.

# applying the sentiment_analyzer function
vader_sent = df2['Speech'].apply(sentiment_analyzer_scores)


      
      
Madam Speaker, Mr. Vice President, Members of Congress, the First Lady of the United States, and my fellow Americans:

We meet tonight at a moment of unlimited potential. As we begin a new Congress, I stand here ready to work with you to achieve historic breakthroughs for all Americans.

Millions of our fellow citizens are watching us now, gathered in this great chamber, hoping that we will govern not as two parties but as one Nation.

The agenda I will lay out this evening is not a Republican agenda or a Democrat agenda. It is the agenda of the American people.

Many of us campaigned on the same core promises: to defend American jobs and demand fair trade for American workers; to rebuild and revitalize our Nation’s infrastructure; to reduce the price of healthcare and prescription drugs; to create an immigration system that is safe, lawful, modern, and secure; and to pursue a foreign policy that puts America’s interests first.

There is a new opportunity in


      
      Mr. Speaker, Mr. Vice President, Members of Congress, the First Lady of the United States, and my fellow Americans:

Less than 1 year has passed since I first stood at this podium, in this majestic chamber, to speak on behalf of the American People—and to address their concerns, their hopes, and their dreams. That night, our new Administration had already taken swift action. A new tide of optimism was already sweeping across our land.

Each day since, we have gone forward with a clear vision and a righteous mission—to make America great again for all Americans.

Over the last year, we have made incredible progress and achieved extraordinary success. We have faced challenges we expected, and others we could never have imagined. We have shared in the heights of victory and the pains of hardship. We endured floods and fires and storms. But through it all, we have seen the beauty of America’s soul, and the steel in America’s spine.

Each test has forged new American

     {'neg': 0.072, 'neu': 0.711, 'pos': 0.217, 'compound': 0.9998}

      
      Mr. Speaker, Mr. Vice President, Members of Congress, my fellow Americans:

Tonight marks the eighth year that I’ve come here to report on the State of the Union. And for this final one, I’m going to try to make it a little shorter. (Applause.) I know some of you are antsy to get back to Iowa. (Laughter.) I've been there. I'll be shaking hands afterwards if you want some tips. (Laughter.)

And I understand that because it’s an election season, expectations for what we will achieve this year are low. But, Mr. Speaker, I appreciate the constructive approach that you and the other leaderstook at the end of last year to pass a budget and make tax cuts permanent for working families. So I hope we can work together this year on some bipartisan priorities like criminal justice reform -- (applause) -- and helping people who are battling prescription drug abuse and heroin abuse. (Applause.) So, who knows, we might


      
      Mr. Speaker, Mr. Vice President, Members of Congress, my fellow Americans:
 
We are 15 years into this new century.  Fifteen years that dawned with terror touching our shores; that unfolded with a new generation fighting two long and costly wars; that saw a vicious recession spread across our nation and the world.  It has been, and still is, a hard time for many. 
 
But tonight, we turn the page.  Tonight, after a breakthrough year for America, our economy is growing and creating jobs at the fastest pace since 1999.  (Applause.)  Our unemployment rate is now lower than it was before the financial crisis.  More of our kids are graduating than ever before.  More of our people are insured than ever before.  (Applause.)  And we are as free from the grip of foreign oil as we’ve been in almost 30 years.  (Applause.)
 
Tonight, for the first time since 9/11, our combat mission in Afghanistan is over.  (Applause.)  Six years ago, nearly 180,000 American troops served in I


      
      Mr. Speaker, Mr. Vice President, Members of Congress, my fellow Americans:

Today in America, a teacher spent extra time with a student who needed it, and did her part to lift America’s graduation rate to its highest level in more than three decades.

An entrepreneur flipped on the lights in her tech startup, and did her part to add to the more than eight million new jobs our businesses have created over the past four years. 

An autoworker fine-tuned some of the best, most fuel-efficient cars in the world, and did his part to help America wean itself off foreign oil.

A farmer prepared for the spring after the strongest five-year stretch of farm exports in our history.  A rural doctor gave a young child the first prescription to treat asthma that his mother could afford.  A man took the bus home from the graveyard shift, bone-tired but dreaming big dreams for his son.  And in tight-knit communities across America, fathers and mothers will tuck in their kids, pu


      
      
Mr. Speaker, Mr. Vice President, members of Congress, fellow citizens:  
 
Fifty-one years ago, John F. Kennedy declared to this chamber that “the Constitution makes us not rivals for power but partners for progress.”  (Applause.) “It is my task,” he said, “to report the State of the Union -- to improve it is the task of us all.”  
 
Tonight, thanks to the grit and determination of the American people, there is much progress to report.  After a decade of grinding war, our brave men and women in uniform are coming home.  (Applause.)  After years of grueling recession, our businesses have created over six million new jobs.  We buy more American cars than we have in five years, and less foreign oil than we have in 20.  (Applause.)  Our housing market is healing, our stock market is rebounding, and consumers, patients, and homeowners enjoy stronger protections than ever before.  (Applause.)  
 
So, together, we have cleared away the rubble of crisis, and we can say 


      
      
Vice President Biden, Mr. Chief Justice,
members of the United States Congress, distinguished guests, and fellow citizens:  
 
Each time we gather to inaugurate a President we bear witness to the enduring strength of our Constitution.  We affirm the promise of our democracy.  We recall that what binds this nation together is not the colors of our skin or the tenets of our faith or the origins of our names.  What makes us exceptional -- what makes us American -- is our allegiance to an idea articulated in a declaration made more than two centuries ago:
“We hold these truths to be self-evident, that all men are created equal; that they are endowed by their Creator with certain unalienable rights; that among these are life, liberty, and the pursuit of happiness.”  
 
Today we continue a never-ending journey to bridge the meaning of those words with the realities of our time.  For history tells us that while these truths may be self-evident, they’ve never been self-


      
       Mr. Speaker, Mr. Vice President, members of Congress, distinguished guests, and fellow Americans:

Last month, I went to Andrews Air Force Base and welcomed home some of our last troops to serve in Iraq.  Together, we offered a final, proud salute to the colors under which more than a million of our fellow citizens fought -- and several thousand gave their lives.

We gather tonight knowing that this generation of heroes has made the United States safer and more respected around the world.  (Applause.)  For the first time in nine years, there are no Americans fighting in Iraq.  (Applause.)  For the first time in two decades, Osama bin Laden is not a threat to this country.  (Applause.)  Most of al Qaeda’s top lieutenants have been defeated.  The Taliban’s momentum has been broken, and some troops in Afghanistan have begun to come home.

These achievements are a testament to the courage, selflessness and teamwork of America’s Armed Forces.  At a time when too many 


      
      Mr. Speaker, Mr. Vice President, members of Congress, distinguished guests, and fellow Americans:

      Tonight I want to begin by congratulating the men and women of the 112th Congress, as well as your new Speaker, John Boehner.  (Applause.)  And as we mark this occasion, we’re also mindful of the empty chair in this chamber, and we pray for the health of our colleague -- and our friend -– Gabby Giffords.  (Applause.)

      It’s no secret that those of us here tonight have had our differences over the last two years.  The debates have been contentious; we have fought fiercely for our beliefs.  And that’s a good thing.  That’s what a robust democracy demands.  That’s what helps set us apart as a nation.

      But there’s a reason the tragedy in Tucson gave us pause. Amid all the noise and passion and rancor of our public debate, Tucson reminded us that no matter who we are or where we come from, each of us is a part of something greater -– something more conseq

KeyboardInterrupt: 

In [None]:
# THIS TAKES A LONG TIME RUNNING, SO DON'T RUN IT JUST FOR FUN

# applying the sentiment_analyzer function
vader_sent = df2['Speech'].apply(sentiment_analyzer_scores)

In [None]:
vader_sent_df1 = pd.DataFrame(vader_sent1.tolist())
vader_sent_df1.head()

In [54]:
# A TRY AT THE VADER IMPLEMENTATION. DIDN'T WORK.

#def nltk_sentiment(sentence):
 #   from nltk.sentiment.vader import SentimentIntensityAnalyzer
    
 #   nltk_sentiment = SentimentIntensityAnalyzer()
 #   score = nltk_sentiment.polarity_scores(sentence)
 #   return score

In [None]:
#nltk_results = [nltk_sentiment(row) for row in df2]
#results_df = pd.DataFrame(nltk_results)
#text_df = pd.DataFrame(df2, columns=['Speech'])
#nltk_df = text_df.join(results_df)
#nltk_df

In [None]:
average=sum(len(word) for word in words) /len(words)
average 



In [None]:
def main():
    wordCount=0
    letterCount=0
    
    for word in words: 
        letterCount=letterCount+len(word)
        wordCount=wordCount+1
    print("Average word length",letterCount/wordCount)
main()
