# Stock Screener

### This notebook is intended to help get a centralized view of important statistics for stocks of companies.

### 1. Imports
We begin with all the imports we will need for analysis.

In [1]:
import pickle
import os
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
import math

### 2. Getting tickers for gold stocks.

In [2]:
#Where tickers comes from
goldStocksUrl = "http://www.miningfeeds.com/gold-mining-report-all-countries"

def goldStockParser(url, tickerColumn, stockTypes):
    """Goes to url above and pulls the tickers of each 
       gold mining company, dumps them in a pickle file 
       and returns the tickers as a list."""
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "lxml")
    table = soup.find("table")
    tickers = []
    for row in table.findAll("tr")[1:]:
        ticker = row.findAll("td")[tickerColumn].text
        #Randgold real ticker.
        if (ticker == "GOLD.L"):
            ticker = "GOLD"
        tickers.append(ticker)

    with open(stockTypes + ".pickle", "wb") as file:
        pickle.dump(tickers, file)

    return tickers

def pickleLoader(stockTypes):
    """Loads tickers from a pickle file and returns them as a list."""
    with open(stockTypes + ".pickle", "rb") as file:
        return pickle.load(file)

#Look in cache if False, go to website if true.    
reloadGoldTickers = False

badDataStocks = ["GLF.AX", "AZ.TO", "LEX.TO", "RN.TO", "P.TO", "CSQ.V"]
goldStockTickers = []

if reloadGoldTickers:
    goldStockTickers = goldStockParser(goldStocksUrl, 2, "goldStocks")
else:
    goldStockTickers = pickleLoader("goldStocks")

#Remove stocks with bad data.    
for ticker in badDataStocks:
    goldStockTickers.remove(ticker)
    
print(goldStockTickers)

['NEM', 'ABX', 'FNV.TO', 'NCM.AX', 'GG', 'AEM', 'KGC', 'RGLD', 'RRS.L', 'FRES.L', 'AGG.AX', 'EVN.AX', 'GFI', 'BTO.TO', 'YRI.TO', 'IMG.TO', 'AGI.TO', 'CEY.L', 'PVG', 'DGC.TO', 'NGD.TO', 'RRL.AX', 'OGC.TO', 'SBM.AX', 'CG.TO', 'EDV.TO', 'NG.TO', 'ELD.TO', 'SAR.AX', 'SMF.TO', 'HMY', 'TXG.TO', 'CGG.TO', 'NSU.TO', 'GUY.TO', 'RSG.AX', 'SEA.TO', 'MUX', 'RIC', 'PG.TO', 'OSK.TO', 'ALIAF', 'CNL.TO', 'GOR.AX', 'POG.L', 'DNA.TO', 'GSS', 'PAF.L', 'AR.TO', 'PRU.AX', 'TBR.AX', 'BGM.V', 'TGZ.TO', 'SBB.TO', 'PRH.AX', 'GORO', 'WDO.TO', 'HRT.TO', 'WAF.AX', 'VIT.V', 'AOT.V', 'NGQ.TO', 'BDR.AX', 'HUM.L', 'RMS.AX', 'SLR.AX', 'BSX.TO', 'GCY.AX', 'KOR.TO', 'MOZ.TO', 'GBU.TO', 'BTR.V', 'HRR.AX', 'PRB.V', 'ALK.AX', 'HAS.AX', 'R.TO', 'RPM.V', 'ARZ.TO', 'JAG.TO', 'MAX.TO', 'TNG.AX', 'PEN.AX', 'CAL.TO', 'TLG.AX', 'RUP.V', 'GGP.L', 'AAL.V', 'KCN.AX', 'ITH.TO', 'MML.AX', 'GGG.AX', 'VGZ', 'ARU.V', 'RMX.TO', 'QBL.AX', 'TSG.L', 'ATC.V', 'KGL.AX', 'TML.TO', 'IMA.AX', 'DRM.AX', 'RED.AX', 'MTO.V', 'DGR.AX', 'MRC.AX', 'DNG.

### 3. Specify formatting details of data.

Specify rows of data that are wanted from yahoo.

Specify rows of data that have letters in their numbers for conversion.

Specify rows of data that have percentage signs in their numbers for conversion.

In [3]:
wantedRows = {0  : "marketCap", 
              1  : "enterpriseValue",
              2  : "trailingPe",
              3  : "forwardPe",
              4  : "5YrPeg",
              5  : "priceSales",
              6  : "priceBook",
              7  : "evSales",
              8  : "evEBITDA",
              11 : "profitMargin",
              12 : "operatingMargin",
              13 : "returnOnAssets",
              14 : "returnOnEquity",
              15 : "revenue",
              16 : "revenuePerShare",
              17 : "quarterlyRevenueGrowth",
              18 : "grossProfit",
              19 : "EBITDA",
              20 : "netIncomeForCommon",
              21 : "Diluted EPS",
              22 : "quarterlyEarningsGrowth",
              23 : "totalCash",
              24 : "cashPerShare",
              25 : "totalDebt",
              26 : "debtEquity",
              27 : "currentRatio",
              28 : "bookValuePerShare", 
              29 : "operatingCashFlow",
              30 : "leveredFreeCashFlow",
              31 : "beta3YrMonthly",
              32 : "52WeekChange",
              33 : "spy52WeekChange",
              34 : "52WeekHigh",
              35 : "52WeekLow",
              36 : "50dayMvAvg",
              37 : "200dayMvAvg",
              38 : "avg3MonthVol",
              39 : "avg10DayVol",
              40 : "sharesOutstanding",
              41 : "sharesFloating",
              42 : "heldByInsiders",
              43 : "heldByInstitutions",
              44 : "sharesShort1MonthPrior",
              45 : "shortRatio",
              46 : "shortToFloat",
              47 : "shortToOutstanding",
              48 : "sharesShort2MonthsPrior",
              49 : "forwardDividendRate",
              50 : "forwardDividendYield",
              51 : "trailingDividendRate",
              52 : "trailingDividendYield",
              53 : "5yrAvgDividendYield",
              54 : "payoutRatio",
              55 : "dividendDate",
              56 : "exDividendDate",
              57 : "lastSplitFactorNewPerOld",
              58 : "lastSplitDate",
              60 : "currentPrice",}

letterNumbers = ["marketCap", 
                 "enterpriseValue",
                 "revenue",
                 "grossProfit",
                 "EBITDA",
                 "netIncomeForCommon",
                 "totalCash",
                 "totalDebt",
                 "operatingCashFlow",
                 "leveredFreeCashFlow",
                 "avg3MonthVol",
                 "avg10DayVol",
                 "sharesOutstanding",
                 "sharesFloating",
                 "sharesShort1MonthPrior",
                 "sharesShort2MonthsPrior",]

percentageNumbers = ["profitMargin",
                     "operatingMargin",
                     "returnOnAssets",
                     "returnOnEquity",
                     "quarterlyRevenueGrowth",
                     "quarterlyEarningsGrowth"
                     "52WeekChange",
                     "spy52WeekChange",
                     "heldByInsiders",
                     "heldByInstitutions",
                     "shortToFloat",
                     "shortToOutstanding",
                     "forwardDividendYield",
                     "trailingDividendYield",
                     "payoutRatio",]


### 4. Retreive data from yahoo and cache it.
We need to look up data from yahoo and store it in a csv locally for caching purposes. Also want to format well. ie remove redundant columns and rename.

In [46]:
baseurl = "https://uk.finance.yahoo.com/quote/"

def getDataFromYahoo(stockTypes, tickers, baseurl = baseurl):
    """Checks if data has been stored in csv, if not
       looks to yahoo. Then returns as map of tickers to dataframe."""
    #Create directory for data of tickers.
    dataDir = "./%s_data" % stockTypes
    createDirIfItDoesntExist(dataDir)
    
    frames = {}
    
    for ticker in tickers:
        dataFile = "./%s/%s.csv" % (dataDir, ticker)
        data = None
        if not os.path.exists(dataFile):
            exists, data = saveDataAndReturnSeries(dataFile, ticker)
            if not exists:
                continue
        else:
            print("Already have %s" % ticker)
            data = pd.Series.from_csv(dataFile, index_col=0)
        frames[ticker] = data
    return pd.DataFrame(frames)

def createDirIfItDoesntExist(dataDir):
    """If directory with name dataDir does not exist, create it."""
    if not os.path.exists(dataDir):
        os.makedirs(dataDir)
        
def reformatData(data):
    """Reformat data with better names and remove redundancies"""
    dataDict = {}
    for index in wantedRows:
        series = data.loc[index]
        key = wantedRows[index]
        value = series.get(1)
        dataDict[key] = value
    return pd.Series(dataDict)

def getCurrency(html):
    """Gets the string of the currency stock is priced in."""
    soup = bs(html)
    results = soup.findAll("span", {"data-reactid" : "9"})
    i = 0
    for result in results:
        if i == 1:
            line = result.text
        i += 1
    return line[-3:] 
        
def saveDataAndReturnSeries(dataFile, ticker):
    """Gets data from yahoo and dumps in csv file before 
       returning data as pandas series. Returns true, series if
       there is data, false, none if not."""
    print("Requesting: %s" % ticker)

    #Go to summary page and scrape open.
    priceData = requests.get(baseurl + "%s?p=%s" % (ticker, ticker))       
    summaryFrames = pd.read_html(priceData.text)
    summaryFrame = pd.concat(summaryFrames, ignore_index=True)
    priceFrame = summaryFrame.loc[1:1]
    
    #Scrape currency from summary page.
    currency = getCurrency(priceData.text)
    
    #Go to stats page and scrape stats.
    data = requests.get(baseurl + "%s/key-statistics?p=%s" % (ticker, ticker))
    frames = pd.read_html(data.text)
    if len(frames) < 3:
        print("No data for: %s" % ticker)
        return False , None

    #Join stats and price
    frames.append(priceFrame)
    frame = pd.concat(frames, ignore_index=True)
    
    #Reformat
    reformattedSeries = reformatData(frame)
    reformattedSeries["Currency"] = currency
    
    #Save data
    reformattedSeries.to_csv(dataFile)
    return True, reformattedSeries

goldTickerData = getDataFromYahoo("gold_stocks", goldStockTickers)

#Convert Nan values to 0.
goldTickerData[pd.isnull(goldTickerData)] = "0"
print(goldTickerData)

Already have NEM
Already have ABX
Already have FNV.TO
Already have NCM.AX
Already have GG
Already have AEM
Already have KGC
Already have RGLD
Already have RRS.L
Already have FRES.L
Already have AGG.AX
Already have EVN.AX
Already have GFI
Already have BTO.TO
Already have YRI.TO
Already have IMG.TO
Already have AGI.TO
Already have CEY.L
Already have PVG
Already have DGC.TO
Already have NGD.TO
Already have RRL.AX
Already have OGC.TO
Already have SBM.AX
Already have CG.TO
Already have EDV.TO
Already have NG.TO
Already have ELD.TO
Already have SAR.AX
Already have SMF.TO
Already have HMY
Already have TXG.TO
Already have CGG.TO
Already have NSU.TO
Already have GUY.TO
Already have RSG.AX
Already have SEA.TO
Already have MUX
Already have RIC
Already have PG.TO
Already have OSK.TO
Already have ALIAF
Already have CNL.TO
Already have GOR.AX
Already have POG.L
Already have DNA.TO
Already have GSS
Already have PAF.L
Already have AR.TO
Already have PRU.AX
Already have TBR.AX
Already have BGM.V
Alread

  infer_datetime_format=infer_datetime_format)


Already have FML.AX
Already have ORN.AX
Already have ER.TO
Already have RGD.V
Already have RBX.V
Already have LYD.TO
Already have TXR.V
Already have DEG.AX
Already have TAM.AX
Already have NGY.AX
Already have CHN.AX
Already have GRR.V
Already have LG.V
Already have GSR.V
Already have TRY.AX
Already have TNX.TO
Already have TLM.AX
Already have ME.TO
Already have MJS.V
Already have GCM.TO
Already have BSR.AX
Already have RTG.TO
Already have CAY.AX
Already have KTN.V
Already have MRP.AX
Already have ARE.AX
Already have FSY.TO
Already have TON.AX
Already have SRB.L
Already have BCN.AX
Already have TSG.V
Already have MAT.AX
Already have IRC.AX
Already have ERM.AX
Already have ORV.TO
Already have HMX.V
Already have NGE.V
Already have SPA.V
Already have ABU.AX
Already have MAW.TO
Already have MNR.TO
Already have ANX.TO
Already have SKE.V
Already have AAU.L
Already have WHY.V
Already have NAG.AX
Already have GRG.V
Already have GQC.V
Already have AAR.AX
Already have AGC.V
Already have HAW.AX
Al

### 5. Reformat text rows into numbers for numerical analysis.

Use maps defined earlier to create masks and apply conversion functions.

In [47]:
def convertLetter(string):
    if string == "0":
        return 0
    string = string.replace(",", "")
    print(string)
    number = string[0:-1]
    letter = string[-1]
    if letter == "B":
        return float(number) * 1000
    if letter == "M":
        return float(number) 
    if letter == "k":
        return float(number) / 100
    
def convertPercent(string):
    if string == "0":
        return 0
    string = string.replace(",", "")
    number = string[0:-1]
    return float(number)


letterMask = goldTickerData.index.isin(letterNumbers)
goldTickerData[letterMask] = goldTickerData[letterMask].applymap(convertLetter)
percentageMask = goldTickerData.index.isin(percentageNumbers)
goldTickerData[percentageMask] = goldTickerData[percentageMask].applymap(convertPercent)

print(goldTickerData)
 

17.23B
19.38B
7.11B
3.33B
2.7B
-250M
3.13B
4.19B
2.04B
987.88M
6.02M
4.68M
532.66M
530.68M
12.43M
9.41M
17.23B
19.38B
7.11B
3.33B
2.7B
-250M
3.13B
4.19B
2.04B
987.88M
6.02M
4.68M
532.66M
530.68M
12.43M
9.41M
15.03B
21.11B
7.57B
3.1B
3.33B
-662M
1.7B
5.74B
1.94B
1.05B
18M
12.51M
1.17B
1.14B
51.38M
39.1M
17.15B
16.45B
670M
533M
519.3M
213.8M
76.9M
503.3M
-396.44M
609.99k
464.53k
186.36M
183.39M
1.2M
976.15k
15.75B
16.78B
3.56B
863M
1.54B
202M
953M
1.99B
1.43B
881.75M
2.44M
2.15M
767.12M
764.12M
8.19B
11.01B
3.11B
1.55B
1.07B
77M
166M
2.97B
1.13B
252.38M
10.2M
6.37M
869M
863.69M
21.16M
23.04M
8.15B
9.54B
2.22B
1.19B
798.11M
107.55M
609.6M
1.72B
632.3M
-514.18M
1.71M
963.93k
232.05M
233.27M
7.04M
6.99M
3.38B
4.42B
3.24B
1.55B
1.11B
221.7M
566M
1.73B
971.6M
51.48M
13.29M
10.8M
1.25B
1.24B
10.78M
16.22M
4.79B
5.15B
443.54M
372.94M
315M
-126.76M
117.08M
354.94M
301.85M
361.88M
539.46k
317.57k
65.51M
65.05M
1.89M
2.17M
5.92B
5.71B
1.21B
511.92M
582.74M
246.38M
653.53M
2.77M
405.97M
87.18M
730.

-2.63%
16.01%
3.44%
-1.62%
-8.10%
3.73%
0.25%
87.53%
3.12%
2.33%
1.73%
1.53%
0
-2.63%
16.01%
3.44%
-1.62%
-8.10%
3.73%
0.25%
87.53%
3.12%
2.33%
1.73%
1.53%
0
-8.75%
28.53%
5.49%
-6.61%
-7.80%
3.73%
1.09%
74.65%
0
3.80%
0.92%
0.92%
0
31.91%
40.19%
3.51%
4.55%
-0.60%
3.73%
0
0
0
0.64%
1.42%
1.04%
82.46%
5.67%
13.14%
2.54%
2.79%
10.50%
3.73%
0
0
0
0
1.46%
0.89%
53.89%
2.47%
-22.65%
-2.02%
0.55%
-28.30%
3.73%
12.22%
0.40%
0
2.43%
0.85%
0.85%
88.89%
4.85%
11.35%
1.94%
2.18%
-10.60%
3.73%
0.17%
76.08%
0
2.79%
1.22%
1.26%
95.65%
6.85%
0.46%
0.12%
4.88%
-8.90%
3.73%
14.63%
0.00%
0
0.86%
0
0
0.00%
-28.58%
-20.50%
-1.98%
-5.99%
-11.80%
3.73%
0.73%
81.87%
3.31%
2.88%
1.40%
1.38%
0
20.30%
31.10%
5.59%
7.20%
-7.10%
3.73%
0
0
0
0
5.67%
0.03%
80.43%
21.78%
32.92%
9.84%
16.04%
12.00%
3.73%
0
0
0
0
3.67%
0.05%
61.66%
0.42%
13.83%
5.32%
1.40%
-5.30%
3.73%
0
0
0
0
0.57%
2.19%
116.20%
17.10%
23.07%
7.40%
11.93%
-1.40%
3.73%
0
0
0
0
2.53%
2.39%
41.91%
-15.54%
16.28%
4.48%
-14.29%
3.50%
3.73%
64.90%
0.00%
0

ValueError: ("could not convert string to float: '10,233.49'", 'occurred at index HRT.TO')