In [None]:
import pandas as pd
import yfinance as yf
import numpy as np
import time 
import json
import requests 
import datetime as dt
import re
import seaborn as sns
import matplotlib.pyplot as plt
import math
from tqdm import tqdm

In [None]:
def universe_selection():
    # Get the updated Wikipedia table for the S&P500
    table = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
    
    # Replace any symbols that contain the character "." with a "-" format for YFinance API
    table["Symbol"].replace(to_replace=r"[.]", value="-", 
                            regex=True, inplace=True)
    
    # Convert to pandas DataFrame
    sp_500 = pd.DataFrame(table["Symbol"])
    
    # Yahoo Finance Historical Data 
    start = time.time()
    momentum = []
    
    yft = yf.Ticker 
    
    for i in tqdm(sp_500["Symbol"]):
        i_y = yf.Ticker(i)
        try:
            if i_y.history is not None:
                hist = pd.DataFrame(i_y.history(start="2021-01-23", end="2021-06-23"))
                hist["company_ticker"] = str(i_y.ticker)
                hist = hist.append(hist)
                m = (hist.iloc[-1, 3] - hist.iloc[0, 3])/hist.iloc[0, 3]
                momentum.append([i, m])
        except IndexError:
            pass

    # Gauging Momentum 
    momentum = pd.DataFrame(momentum, columns=["Ticker", "6Month Change"])
    momentum['Percentile Rank'] = momentum["6Month Change"].rank(pct = True)
    
    # Dropping bottom 95% of ranked momentum tickers
    universe = momentum.drop(momentum[momentum["Percentile Rank"] < 0.95].index)
    universe = universe.dropna()
    tickers = universe["Ticker"]
    
    # Generating Piotroski F-Score 
    Piotf_score = []

    for ticker in tqdm(tickers):
        
        # Retrieving Data from Yahoo Finance
        if yft(ticker).balance_sheet is not None:
            bal = pd.DataFrame(yft(ticker).balance_sheet)
        else: 
            print(f"No data available for {ticker}")

        # Balance sheet 
        long_term_debt = float(bal.iloc[20, 0])
        total_assets = float(bal.iloc[3, 0])
        current_liab = float(bal.iloc[11, 0])
        current_ass = float(bal.iloc[15, 0])
        
        # Previous year balance sheet
        ltd_py = float(bal.iloc[20, 1])
        ta_py = float(bal.iloc[3, 1])
        current_liab_py = float(bal.iloc[11, 1])
        current_ass_py = float(bal.iloc[15, 1])

        # Financials 
        if yft(ticker).financials is not None:
            financials = pd.DataFrame(yft(ticker).financials)
        else: 
            print(f"No data available for {ticker}")

        revenue = float(financials.iloc[15, 0])
        gross_p = float(financials.iloc[6, 0])
        net_income = float(financials.iloc[4, 0])

        # Previous year financials
        revenue_py = float(financials.iloc[15, 1])
        gross_p_py = float(financials.iloc[6, 1])
        net_income_py = float(financials.iloc[4, 1])

        # Operating Cashflow
        if yft(ticker).cashflow is not None:
            cashflow = pd.DataFrame(yft(ticker).cashflow)
        else: 
            print(f"No data available for {ticker}")

        op_cash = float(cashflow.iloc[10, 0])

        ### Calculating Piotroski Score ###
        # Positive Net Income
        NE = int(net_income > 0)

        # Positive return on assets in the current year
        ROA = int((net_income/total_assets) > 0)

        # Positive operating cash flow in the current year
        CFO = int(op_cash > 0)

        # Cash flow from operations being greater than net Income
        CFO_NE = int(op_cash > net_income)

        # Lower ratio of long term debt in the current period, compared to the previous year
        RLTD = int((long_term_debt/total_assets) < (ltd_py/ta_py))

        # Higher current ratio this year compared to the previous year
        CR = int((current_ass/current_liab)>(current_ass_py/current_liab_py))

        # No new shares were issued in the last year

        # A higher gross margin compared to the previous year
        GM = int((gross_p/revenue)>(gross_p_py/revenue_py))

        # A higher asset turnover ratio compared to the previous year
        ATR = int((revenue/total_assets)>(revenue_py/ta_py))

        f_score = NE + ROA + CFO + CFO_NE + RLTD + CR + GM + ATR

        Piotf_score.append([ticker, f_score])
    
    # Ranking Tickers based on Piotroski F-Score results (x/8)
    Piotf_score = pd.DataFrame(Piotf_score, columns=["Ticker", "Piotroski F-Score"])
    Piotf_score['Percentile Rank'] = Piotf_score["Piotroski F-Score"].rank(pct = True)
    
    # Dropping bottom 70% of ranked F-Score Tickers 
    f_universe = Piotf_score.drop(Piotf_score[Piotf_score["Percentile Rank"] < 0.70].index)

    # Saving universe selection to csv file to be used in "Portfolio Theory.ipynb"
    f_universe["Ticker"].to_csv(r"universe.csv", header="Ticker", index=None, sep=' ')
    
    return