To Extract Historical Stock Data from Yahoo! Finance
==

To do list:
- Read and write json data (learn from yahoo-oauth)
- Use simple URL requests to authenticate (two-legged)

# Read and Write JSON Data

The user needs to specify *consumer_key* and *consumer_secret* in the specified .json file to begin with.

Then the program is capable to write other authentication data into the same file (such as expiry time). These data will be useful to avoid repetitive authentication when token is valid.

## Learning resource 
```
git install yahoo-oauth
yahoo_oauth/yahoo_oauth.py
yahoo_oauth/utils.py
```

In [1]:
import json

def json_write_data(json_data, filename):
    """Write json data into a file
    """
    with open(filename, 'w') as fp:
        json.dump(json_data, fp, indent=4, 
                 sort_keys=True, 
                 ensure_ascii=False)
        return True
    return False

def json_get_data(filename):
    """Get data from json file 
    """
    with open(filename, 'r') as fp:
        json_data = json.load(fp)
        return json_data
    return False

# Logging

To produce a log file.

In [6]:
import logging
from logging import Logger, handlers

class YahooLogger(logging.Logger):
    """
    Yahoo! Logger class
    """
    
    def __init__(self, name, level=logging.DEBUG):
        """
        - name : logger name
        - filename : file containing logs
        """
        super(YahooLogger, self).__init__(name)
        self.name = name
        self.level = level
        
        self.setLevel(self.level)
        
        formatter = logging.Formatter("[%(asctime)s %(levelname)s] [%(name)s.%(module)s.%(funcName)s] %(message)s")
        
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(formatter)
        self.addHandler(stream_handler)

# Initiate a logger
logging.setLoggerClass(YahooLogger)
logger = logging.getLogger('Yahoo! OAuth')
logger.propagate = False

In [68]:
import random
import time
import base64
import urllib
import re

class YahooOAuth():
    """
    Yahoo! two-legged authentication
    """
    def __init__(self, consumer_key, consumer_secret, **kwargs):
        """
        consumer_key : client key
        consumer_secret : client secret 
        key : string + consumer_key
        secret : string + consumer_secret
        """
        self.service_params = {}
        
        if kwargs.get('from_file'):
            logger.debug("Checking the json session file.")
            self.from_file = kwargs.get('from_file')
            self.service_params = json_get_data(self.from_file)
            vars(self).update(self.service_params)
        else:
            self.consumer_key = consumer_key
            self.consumer_secret = consumer_secret
            
        vars(self).update(kwargs) # Update properties just in case user specifies them
        
        # Example to get a property from json_data
        # self.callback_uri = vars(self).get('callback_uri', CALLBACK_URI)
        
        # Initiate Two-legged Auth
        self.service_params.update({
            'consumer_key' : self.consumer_key, 
            'consumer_secret' : self.consumer_secret
        })
        
        if vars(self).get('timestamp'):
            self.token_time = int(vars(self).get('timestamp'))        
        else:
            self.token_time = 0
        
        if not self.token_is_valid():
            self.refresh_access_token()
    
    def refresh_access_token(self):
        """Refresh access token.
        """
        logger.debug("Refresh acess token")
        
        self.update_time_nonce() # update timestamp & nonce
        self.token_time = self.service_params['timestamp']
        
        # Form auth url and parse results
        self.oauth_url = 'https://api.login.yahoo.com/oauth/v2/get_request_token?oauth_callback=oob'
        self.two_leg_oauth_url = self.oauth_url + \
                                 '&oauth_consumer_key=' + self.service_params['consumer_key'] + \
                                 '&oauth_signature=' + self.service_params['consumer_secret'] + '%26' + \
                                 '&oauth_timestamp=' + self.service_params['timestamp'] + \
                                 '&oauth_nonce=' + self.service_params['nonce'] + \
                                 '&oauth_signature_method=PLAINTEXT&oauth_version=1.0'
    
        self.token = self.url_request(self.two_leg_oauth_url)
        self.parse_token(self.token)
        
        # Save session token to file
        json_write_data(self.service_params, vars(self).get('from_file', 'secrets.json'))
    
        
    def token_is_valid(self):
        """ Check the validity of the token: 3600s
        """
        elapsed_time = time.time() - self.token_time
        logger.debug("ELAPSED TIME: {0}".format(elapsed_time))
        if elapsed_time > 3540: # 1 minute before it expires
            logger.debug("Token has EXPIRED.")
            return False
        else:
            logger.debug("Token is still valid")
            return True
        
        
    def parse_token(self, s):
        """Parse session token from the authentication url
        I don't need the xoauth_request_auth_url. 
        I think that is what I would need to pass to the user 
        if it was for a three-legged authentication.
        
        s : token
        """
        oauth_token = re.search('oauth_token\=.*?\&', s).group(0)[12:-1]
        oauth_token_secret = re.search('oauth_token_secret\=.*?\&', s).group(0)[len('oauth_token_secret='):-1]
        oauth_expires_in = re.search('oauth_expires_in\=.*?\&', s).group(0)[len('oauth_expires_in='):-1]
        
        self.service_params.update({
                'oauth_token' : oauth_token,
                'oauth_token_secret' : oauth_token_secret,
                'oauth_token_expires_in' : oauth_expires_in
            })
        
    def url_request(self, url):
        """Read the content from a url
        Returns the content in string
        """
        req = urllib.request.Request(url)
        # Handle errors
        try:
            response = urllib.request.urlopen(req)
        except urllib.error.HTTPError as e:
            logger.debug('The server cannot fulfill the request.')
            logger.debug('Error code: ' + str(e.code))
        except urllib.error.URLError as e:
            logger.debug('We failed to reach the Yahoo! OAuth server')
            logger.debug('Error reason: ' + str(e.reason))
        else:
            logger.debug('Authenticated successfully.')
        
        oauth_token = response.read().decode("utf8")
        return oauth_token
        
    def update_time_nonce(self):
        """Update two-leg authentication:
        - timestamp
        - nonce
        """
        self.service_params.update({
            'timestamp' : str(int(time.time())), 
            'nonce' : self.get_nonce(), 
        })
        
    def get_nonce(self):
        """Unique token generated for each request"""
        n = base64.b64encode(
            ''.join([str(random.randint(0, 9)) for i in range(24)]).encode('utf-8'))
        return str(n.decode('utf-8'))

yf = YahooOAuth(None, None, from_file='oauth.json')
print(yf.service_params)

[2015-08-20 21:57:32,495 DEBUG] [Yahoo! OAuth.<ipython-input-68-78863ad82b8c>.__init__] Checking the json session file.
[2015-08-20 21:57:32,496 DEBUG] [Yahoo! OAuth.<ipython-input-68-78863ad82b8c>.token_is_valid] ELAPSED TIME: 1000002352.496213
[2015-08-20 21:57:32,496 DEBUG] [Yahoo! OAuth.<ipython-input-68-78863ad82b8c>.token_is_valid] Token has EXPIRED.
[2015-08-20 21:57:32,497 DEBUG] [Yahoo! OAuth.<ipython-input-68-78863ad82b8c>.refresh_access_token] Refresh acess token
[2015-08-20 21:57:53,233 DEBUG] [Yahoo! OAuth.<ipython-input-68-78863ad82b8c>.url_request] Authenticated successfully.


{'timestamp': '1440079052', 'consumer_secret': '0d0c9222ce8533a5e1350e38f9216ef633561289', 'oauth_token': 'dwaqbvk', 'oauth_token_secret': '276ee3dfff914fff7e48809bd0a1af346a783023', 'oauth_token_expires_in': '3600', 'nonce': 'MjU2NDE5ODM5Mzc5MjM4MDg3NjQ1NTAw', 'consumer_key': 'dj0yJmk9VWo0RXhCSk80MmlQJmQ9WVdrOVNXRjFOa2cwTm1zbWNHbzlNQS0tJnM9Y29uc3VtZXJzZWNyZXQmeD1hYw--'}


bsfdyha
3612958d224e1d5aa70f18270bdcbb42483e142c
3600


In [1]:
"""
https://api.login.yahoo.com/oauth/v2/get_request_token?oauth_callback=oob&oauth_consumer_key=PQRST&oauth_nonce=noncasdfe&oauth_signature=ABCD%26&oauth_signature_method=PLAINTEXT&oauth_timestamp=1401800435&oauth_version=1.0
while replacing ABCD as Consumer Secret; PQRST as Consumer Key.
Note: oauth_signature is your Consumer Secret followed by %26. If you Consumer Secret was ABCD your oauth_signature would become: ABCD%26
"""     
# Build URL
url_base = 'https://api.login.yahoo.com/oauth/v2/get_request_token?oauth_callback=oob'
consumer_key = '&oauth_consumer_key='+ yf.session.consumer_key
consumer_secret = '&oauth_signature=' + yf.session.consumer_secret + '%26'
other = '&oauth_nonce=noncasdfe' + '&oauth_signature_method=PLAINTEXT&oauth_timestamp=1401800435&oauth_version=1.0'
print(url_base + consumer_key + consumer_secret + other)


NameError: name 'yf' is not defined