In [11]:
# Homework 10 - Creating an interactive chart 

# In this notebook I will obtain the data to be charted in Vega Lite

# President Obama Polling Data - Scraped from rasmussen reports using pandas 

# Retail Gasoline Prices - downloaded from the fred api

In [12]:
import pandas as pd

Obama_Approval = pd.read_html('https://www.rasmussenreports.com/public_content/politics/obama_administration/obama_approval_index_history')

# Loading and cleaning the data 

DF_Obama_Approval = Obama_Approval[0]

DF_Obama_Approval["Date"] = pd.to_datetime(DF_Obama_Approval["Date"])

DF_Obama_Approval['Total Approve'] = DF_Obama_Approval['Total Approve'].str.replace('%',' ')
DF_Obama_Approval['Total Approve'] = pd.to_numeric(DF_Obama_Approval['Total Approve'], errors ='coerce')
DF_Obama_Approval.dropna(subset = ["Total Approve"], inplace=True)

DF_Obama_Approval['Total Disapprove'] = DF_Obama_Approval['Total Disapprove'].str.replace('%',' ')
DF_Obama_Approval['Total Disapprove'] = pd.to_numeric(DF_Obama_Approval['Total Disapprove'], errors ='coerce')
DF_Obama_Approval.dropna(subset = ["Total Disapprove"], inplace=True)

DF_Obama_Approval['Strongly Approve'] = DF_Obama_Approval['Strongly Approve'].str.replace('%',' ')
DF_Obama_Approval['Strongly Approve'] = pd.to_numeric(DF_Obama_Approval['Strongly Approve'], errors ='coerce')
DF_Obama_Approval.dropna(subset = ["Strongly Approve"], inplace=True)


DF_Obama_Approval['Strongly Disapprove'] = DF_Obama_Approval['Strongly Disapprove'].str.replace('%',' ')
DF_Obama_Approval['Strongly Disapprove'] = pd.to_numeric(DF_Obama_Approval['Strongly Disapprove'], errors ='coerce')
DF_Obama_Approval.dropna(subset = ["Strongly Disapprove"], inplace=True)

# Creating Weekly averages for presidential approval

DF_Obama_Approval_Weekly = DF_Obama_Approval.resample('W', on='Date').mean().reset_index()

# off-setting the day by 1 so the weekly average of the previous week is reflected on the monday of the next week.
# (This is so the data can be merged with the FRED data, which has a weekly frequency ending Monday)

DF_Obama_Approval_Weekly['Date'] = pd.DatetimeIndex(DF_Obama_Approval_Weekly['Date']) + pd.DateOffset(1)

In [13]:
# Importing the required packages to download the data from the api.

import requests

import os 

import json

from google.colab import files

In [14]:
# Downloading the Json file from th Fred api and loading it into a pandas data frame 

url = 'https://api.stlouisfed.org/fred/series/observations?series_id=GASALLW&api_key=e725511ce5615fe46441618aca9c09bf&file_type=json'

data = requests.get(url)

Retail_Gasoline_Consumer_Price = data.json()

Observations = Retail_Gasoline_Consumer_Price['observations']

Retail_Gasoline_Consumer_Price_Array=[]

for x in Observations:
  temp_variable={'date':x['date'],'value':x['value']}
  Retail_Gasoline_Consumer_Price_Array.append(temp_variable)

DF_Retail_Gasoline_Price  = pd.DataFrame(Retail_Gasoline_Consumer_Price_Array)

# Adding New Column Names 

DF_Retail_Gasoline_Price.columns = ['Date','Retail Price of Gasoline (Dollars per Gallon)']

# Formatting Dates

DF_Retail_Gasoline_Price['Date'] = pd.to_datetime(DF_Retail_Gasoline_Price['Date'])

# Formatting Price Index

DF_Retail_Gasoline_Price['Retail Price of Gasoline (Dollars per Gallon)'] = pd.to_numeric(DF_Retail_Gasoline_Price['Retail Price of Gasoline (Dollars per Gallon)'], errors ='coerce')


In [15]:
# Merging the data from the two data sources to create a single data set 

DF_Obama_Approval_Weekly_With_Gasoline_Prices = DF_Obama_Approval_Weekly.merge(DF_Retail_Gasoline_Price, left_on='Date', right_on='Date')

# Creating a new year field, so a slider in vega lite based on the year field can be produced

DF_Obama_Approval_Weekly_With_Gasoline_Prices['Year'] = pd.DatetimeIndex(DF_Obama_Approval_Weekly_With_Gasoline_Prices['Date']).year

DF_Obama_Approval_Weekly_With_Gasoline_Prices.to_csv("Homework10_President_Obama_Polling_Weekly_Retail_Price_of_Gasoline.csv")