In [None]:
from urllib.request import urlopen
from aiohttp import ClientSession
import asyncio, re

In [None]:
class Scan:
    """
    Scans the market to find interesting tickers
    """

    def __init__(self):
        self.url = "http://www.finance.yahoo.com/quote/{0}/?p={0}"
        self.nasdaq_tickers = self._import_nasdaq()
        self.active_tickers = {}
        self.responses = {}
        self.attributes = [
                           'regularMarketVolume',
                           'averageVolume',
                           'postMarketPrice',
                           'preMarketPrice',
                           'regularMarketPrice',
                           'previousClose',
                           'fiftyTwoWeekLow',
                           'fiftyTwoWeekHigh',
                           ]
        
        loop = asyncio.get_event_loop()
        future = asyncio.ensure_future(self._fill_responses())
        loop.run_until_complete(future)
        loop.close()

        for ticker in self.nasdaq_tickers:
            self._fill_ticker_data(ticker)


    def _import_nasdaq(self):
        """
        Grabs all tickers trading on NASDAQ
        """

        buffer_data = urlopen(
            'ftp://ftp.nasdaqtrader.com/symboldirectory/nasdaqtraded.txt')
        tickers = []
        for line in buffer_data:
            line = line.decode('utf-8')
            tickers.append(line[2:line.find('|', 2)])
        return [ticker for ticker in tickers[1:-1] if ticker.find('$') == -1]


    async def _fill_responses(self):
        """
        Fills the HTML trees for each ticker
        """
        tasks = []

        async with ClientSession() as session:
            for ticker in self.nasdaq_tickers:
                task = asyncio.ensure_future(self._fetch_response(ticker, session))
                tasks.append(task)

            return await asyncio.wait(tasks)

    async def _fetch_response(self, ticker, session):
        """
        Grabs the HTTP Response for a given URL
        """

        async with session.get(self.url.format(ticker.upper())) as response:
            self.responses[ticker] = await response.read()


    def _fill_ticker_data(self, ticker):
        """
        Fills the relevant information for the given ticker
        """

        #create ticker dictionary
        ticker_dict = {}
        
        for attribute in self.attributes:
            ticker_dict[attribute] = self._grab_attribute(
                self.responses[ticker], ticker, attribute)
        
        
        if ticker_dict['averageVolume'] > 5e6 or ticker_dict['regularMarketVolume'] > 5e3:
            
            if ticker_dict['preMarketPrice']:
                gap1 = (
                        ticker_dict['preMarketPrice'] - ticker_dict['regularMarketPrice']
                        )/ticker_dict['regularMarketPrice']
                gap2 = (
                        ticker_dict['regularMarketPrice'] - ticker_dict['previousClose']
                        )/ticker_dict['previousClose']

            elif ticker_dict['postMarketPrice']:
                gap1 = (
                        ticker_dict['postMarketPrice'] - ticker_dict['regularMarketPrice']
                        )/ticker_dict['regularMarketPrice']
                gap2 = (
                        ticker_dict['regularMarketPrice'] - ticker_dict['previousClose']
                        )/ticker_dict['previousClose']

            else:
                gap1 = (
                        ticker_dict['regularMarketPrice'] - ticker_dict['previousClose']
                        )/ticker_dict['previousClose']
                gap2 = 0

            if abs(ticker_dict['gap1']) >= .04 or abs(ticker_dict['gap2']) >= .04:
                
                self.active_tickers[ticker] = ticker_dict                


    def _grab_attribute(self, string, ticker, attribute):
        """
        Grabs the relevant attribute from the HTML
        """

        pattern = ticker.upper() + r'","price,summaryDetail.*?' + attribute + r'":{(.*?)}'
        match = re.search(pattern, string.decode('utf-8').strip()).group(1)
        if match == "":
            return None
        else:
            match = re.search(r'"raw":(.*?),', match).group(1)

        if attribute == 'regularMarketVolume' or attribute == 'averageVolume':
            return match
        else:
            return float(match)