In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import datetime
from datetime import datetime
from time import mktime 

## NEED:
* ^DJI
* SS Filings
* VG Retirement Funds (expiration dates nearest present)

# User input for iterating dataset creation

In [4]:
def epochDateConverter(period):
    """
    Returns epoch date
    parameters: period = date in mm-dd-yyyy format
    """
    month, day, year = map(date, period.split('-'))
    period = datetime.date(year, month, day)
    dateEpoch = int(mktime(period.timetuple()))

    return dateEpoch
    
    
def yahooFinance(startDateEpoch, endDateEpoch, ticker, interval):
    """
    Avoids using Yahoo Finance API as many pages continue to kill and limit access
    """
    pageLink = 'https://finance.yahoo.com/quote/{ticker}/history?period1={startDateEpoch}&period2={endDateEpoch}&interval={interval}&filter=history&frequency={interval}'.format(ticker = ticker,startDateEpoch = startDateEpoch, endDateEpoch = endDateEpoch, interval = interval)
    
    pageResponse = requests.get(pageLink)
    pageContent = BeautifulSoup(pageResponse.content, 'html.parser')
    tablesBody = pageContent.find_all('tbody')[0]
    
    # Table content
    bodyTags = tablesBody.select("span")
    bodies = [i.get_text() for i in bodyTags]
    bodies = np.reshape(bodies,(int((len(bodies) / 7)),7))
    bodiesDF = pd.DataFrame(bodies)
    
    DF = pd.DataFrame({'Date': bodiesDF[0], 'Open': bodiesDF[1], 
                'High':bodiesDF[2], 'Low':bodiesDF[3], 
                'Close':bodiesDF[4], 'AdjClose':bodiesDF[5], 
                'Volume':bodiesDF[6]})
    return DF

startDate = input('Enter the start date of interest (mm-dd-yyyy): ')
startDateEpoch = epochDateConverter(startDate)
    
endDate = input('Enter the end date of interest (mm-dd-yyyy): ')
endDateEpoch = epochDateConverter(endDate)
    
interval = input('Enter 1d for daily, 1wk for weekly, and 1mo for monthly: ')
ticker = input('Enter the ticker symbol of interest: ')

In [None]:
ticker = input('Enter the ticker symbol of interest: ')
pageLink = 'https://finance.yahoo.com/quote/{ticker}/history?period1=1509422400&period2=1540958400&interval=1d&filter=history&frequency=1d'.format(ticker = ticker)
pageLink

## DJI

In [2]:
pageLink = 'https://finance.yahoo.com/quote/^DJI/history?period1=1509422400&period2=1540958400&interval=1d&filter=history&frequency=1d'
pageResponse = requests.get(pageLink)
pageContent = BeautifulSoup(pageResponse.content, 'html.parser')
tablesHead = pageContent.find_all('thead')[0]
tablesBody = pageContent.find_all('tbody')[0]

In [3]:
# Table headings
headingTags = tablesHead.select("span")
headings = [i.get_text() for i in headingTags]
# Table content
bodyTags = tablesBody.select("span")
bodies = [i.get_text() for i in bodyTags]
bodies = np.reshape(bodies,(int((len(bodies) / 7)),7))
bodiesDF = pd.DataFrame(bodies)

In [4]:
DJI = pd.DataFrame({'Date': bodiesDF[0], 'Open': bodiesDF[1], 
                    'High':bodiesDF[2], 'Low':bodiesDF[3], 
                    'Close':bodiesDF[4], 'AdjClose':bodiesDF[5], 
                    'Volume':bodiesDF[6]})

In [5]:
DJI.head()

Unnamed: 0,Date,Open,High,Low,Close,AdjClose,Volume
0,"Oct 31, 2018",25008.82,25336.55,25008.82,25115.76,25115.76,448930000
1,"Oct 30, 2018",24482.04,24906.68,24415.69,24874.64,24874.64,470050000
2,"Oct 29, 2018",24818.98,25040.58,24122.23,24442.92,24442.92,443850000
3,"Oct 26, 2018",24770.25,24916.16,24445.19,24688.31,24688.31,505310000
4,"Oct 25, 2018",24736.54,25104.29,24645.56,24984.55,24984.55,439670000


## Social Security

In [7]:
pageLink = 'https://www.ssa.gov/policy/docs/quickfacts/stat_snapshot/'
pageResponse = requests.get(pageLink)
pageContent = BeautifulSoup(pageResponse.content, 'html.parser')
pageResponse

<Response [200]>

In [59]:
SSTable = pageContent.find_all(id = 'table1')
SSTable[0].find('thead')

<thead>
<tr>
<th class="stubHeading" scope="col">Type of beneficiary</th>
<th scope="col">Total</th>
<th scope="col">Social Security only</th>
<th scope="col"><abbr class="spell">SSI</abbr> only</th>
<th scope="col">Both Social Security and <abbr class="spell">SSI</abbr></th>
</tr>
</thead>

In [62]:
print(SSTable[0].prettify())

<div class="table clear" id="table1">
 <table>
  <caption>
   <span class="tableNumber">
    Table 1.
   </span>
   Number of people receiving Social Security, Supplemental Security Income (
   <abbr class="spell">
    SSI
   </abbr>
   ), or both, September 2018 (in thousands)
  </caption>
  <colgroup span="1" style="width:15em">
  </colgroup>
  <colgroup span="4" style="width:10em">
  </colgroup>
  <thead>
   <tr>
    <th class="stubHeading" scope="col">
     Type of beneficiary
    </th>
    <th scope="col">
     Total
    </th>
    <th scope="col">
     Social Security only
    </th>
    <th scope="col">
     <abbr class="spell">
      SSI
     </abbr>
     only
    </th>
    <th scope="col">
     Both Social Security and
     <abbr class="spell">
      SSI
     </abbr>
    </th>
   </tr>
  </thead>
  <tbody>
   <tr>
    <th class="stub1" scope="row">
     All beneficiaries
    </th>
    <td>
     67,677
    </td>
    <td>
     59,523
    </td>
    <td>
     5,427
    </td>
    <td

In [19]:
headings = [i.get_text() for i in bodyTags]


[<th class="stubHeading" scope="col">Type of beneficiary</th>,
 <th scope="col">Total</th>,
 <th scope="col">Social Security only</th>,
 <th scope="col"><abbr class="spell">SSI</abbr> only</th>,
 <th scope="col">Both Social Security and <abbr class="spell">SSI</abbr></th>,
 <th class="stub1" scope="row">All beneficiaries</th>,
 <th class="stub0" scope="row">Aged 65 or older</th>,
 <th class="stub0" scope="row">Disabled, under age 65 <sup>a</sup></th>,
 <th class="stub0" scope="row">Other <sup>b</sup></th>,
 <th class="stubHeading" id="c1" rowspan="2">Type of beneficiary</th>,
 <th class="spanner" colspan="2" id="c2">Beneficiaries</th>,
 <th id="c3" rowspan="2">Total monthly benefits (millions of dollars)</th>,
 <th id="c4" rowspan="2">Average monthly benefit (dollars)</th>,
 <th headers="c2" id="c5">Number (thousands)</th>,
 <th headers="c2" id="c6">Percent</th>,
 <th class="stub3" headers="c1" id="r1">Total</th>,
 <th class="stub0" headers="c1" id="r2">Old-Age and Survivors Insurance<

In [None]:
SS = pd.DataFrame({'Date': bodiesDF[0], 'TypeOfBeneiciary': bodiesDF[1], 
                    'Total':bodiesDF[2], 'SocialSecurityOnly':bodiesDF[3], 
                    'SSIOnly':bodiesDF[4], 'BothSocialSecurityAndSSI':bodiesDF[5])