# Library imports

In [4]:
from scipy.stats import percentileofscore as score
from scipy import stats
import pandas as pd
import numpy as np 
import xlsxwriter 
import requests
import math

# Importing Our List of Stocks and getting API Token


In [14]:
pd.set_option('display.max_rows', None)
stocks = pd.read_csv('sp_500_stocks.csv')
IEX_CLOUD_API_TOKEN = 'Tpk_059b97af715d417d9f49f50b51b1c448'
symbol = 'AAPL'
api_url = f'https://sandbox.iexapis.com/stable/stock/{symbol}/stats/?token={IEX_CLOUD_API_TOKEN}'
data = requests.get(api_url).json()

# Executing a batch API Call & Building our Dataframe

In this section, we will filter for stocks with the lowest percentiles on the following metrics:

    1. Price-to-earnings ratio (Equivalent in portuguese: P/L - Preço sobre lucro)
    2. Price-to-book ratio (Equivalent in portuguese: P/VP - Preço sobre Valor Patrimonial)
    3. Price-to-sales ratio (Equivalent in portuguese: P/S - Preço sobre vendas
    4. Enterprise Value divided by earnings before interest, taxes, depreciation, and amortization (EV/EBITDA)
    5. Enterprise Value divided by gross profit (EV/GP)

Some of these metrics aren't provided directly by the IEX Cloud API, and must be computed after pulling raw data.

In [10]:
symbol = 'AAPL'
batch_api_call_url = f'https://sandbox.iexapis.com/stable/stock/market/batch?symbols={symbol} &types=quote,advanced-stats&token={IEX_CLOUD_API_TOKEN}'
data = requests.get(batch_api_call_url).json()

print(data['AAPL']['advanced-stats']['priceToBook'])


# Price-to-earnings ratio
pe_ratio = data[symbol]['quote']['peRatio']

# Price-to-book ratio 
pb_ratio = data['AAPL']['advanced-stats']['priceToBook']

# Price-to-sales ratio 
ps_ratio = data['AAPL']['advanced-stats']['priceToSales']

# Enterprise value divided by earnings before interest, taxes, depreciation, and amortization (EV/EBITDA)
enterprise_value = data['AAPL']['advanced-stats']['enterpriseValue']
ebitda = data['AAPL']['advanced-stats']['EBITDA']
ev_to_ebitda = enterprise_value/ebitda

# Enterprise value divided by Gross Profit (EV/GP)
gross_profit = data['AAPL']['advanced-stats']['grossProfit']
ev_to_gross_profit = enterprise_value/gross_profit


39.7


In [15]:
rv_columns = [
    'Ticker',
    'Price',
    'Number of Shares to Buy',
    'Price-to-Earnings Ratio',
    'PE Percentile',
    'Price-to-Book Ratio',
    'PB Percentile',
    'Price-to-Sales Ratio',
    'PS Percentile',
    'EV/EBITDA',
    'EV/EBITDA Percentile',
    'EV/GP',
    'EV/GP Percentile',
    'RV Score'
]

rv_dataframe = pd.DataFrame(columns = rv_columns)
rv_dataframe

Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score


In [18]:
def chunks(lst,n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]

symbol_groups = list(chunks(stocks['Ticker'],100))
symbol_strings = []
for i in range(0, len(symbol_groups)):
    
    symbol_strings.append(','.join(symbol_groups[i]))
    #print (symbol_strings[i])
    
    
for symbol_string in symbol_strings[:6]:      
    batch_api_call_url = f'https://sandbox.iexapis.com/stable/stock/market/batch?symbols={symbol_string} &types=quote,advanced-stats&token={IEX_CLOUD_API_TOKEN}'
    data = requests.get(batch_api_call_url).json()
    
    for symbol in symbol_string.split(','):
        
        
        enterprise_value = data[symbol]['advanced-stats']['enterpriseValue']
        ebitda = data[symbol]['advanced-stats']['EBITDA']

        # Enterprise value divided by Gross Profit (EV/GP)
        gross_profit = data[symbol]['advanced-stats']['grossProfit']
        
        try:
            ev_to_gross_profit = enterprise_value/gross_profit
        except:
            ev_to_gross_profit = np.NaN
        
        
        try:
            ev_to_ebitda = enterprise_value/ebitda
        except TypeError:
            ev_to_ebitda = np.NaN
        
        rv_dataframe = rv_dataframe.append(
            pd.Series([
                symbol,
                data[symbol]['quote']['latestPrice'],
                'N/A',
                data[symbol]['quote']['peRatio'],
                'N/A',
                data[symbol]['advanced-stats']['priceToBook'],
                'N/A',
                data[symbol]['advanced-stats']['priceToSales'],
                'N/A',
                ev_to_ebitda,
                'N/A',
                ev_to_gross_profit,
                'N/A',            
                'N/A'
        ],
        index = rv_columns), ignore_index = True)
        
rv_dataframe
     

Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,AAP,160.56,,18.95,,3.51,,0.85,,10.459505,,2.199,,
1,AAPL,144.1,,22.96,,39.03,,5.99,,17.886103,,13.984326,,
2,ABBV,138.46,,19.21,,16.99,,4.29,,10.806597,,7.675995,,
3,ABC,139.55,,15.73,,128.72,,0.1242,,8.771771,,3.607946,,
4,ABMD,249.21,,53.02,,7.6,,10.84,,37.09303,,12.252863,,
5,ABT,97.03,,20.23,,4.87,,3.84,,14.436742,,6.679149,,
6,ACN,264.4,,26.14,,8.18,,2.94,,14.257263,,8.219176,,
7,ADBE,287.4,,28.29,,9.0,,7.56,,20.830873,,8.544242,,
8,ADI,142.4,,39.83,,1.98,,6.55,,21.829415,,11.53949,,
9,ADM,82.7,,13.17,,1.93,,0.4857,,10.26432,,7.957115,,


# Dealing with Missing Data in Our DataFrame
Our DataFrame contains some missing data because all of the metrics we require are not available through the API we're using.

You can use pandas' isnull method to identify missing data.

In [19]:
rv_dataframe[rv_dataframe.isnull().any(axis=1)].index

Int64Index([ 26,  38,  69, 115, 132, 161, 182, 186, 189, 200, 249, 320, 321,
            322, 343, 355, 437, 447, 488],
           dtype='int64')

Dealing with missing data is an important topic in data science.

There are 2 main approaches:

1. Drop missing data from the data set (pandas's dropna method is useful here)
2. Replace missing data with a new value (pandas' fillna method is useful here)

In [20]:
rv_dataframe_filled = rv_dataframe
for column in ['Price-to-Earnings Ratio', 'Price-to-Book Ratio', 'Price-to-Sales Ratio', 'EV/EBITDA', 'EV/GP']:
    rv_dataframe_filled[column].fillna(rv_dataframe[column].mean(), inplace = True)
    

Now, if we run the statement from earlier to print rows that contain missing data, nothing should be returned:

In [23]:
rv_dataframe_filled[rv_dataframe_filled.isnull().any(axis=1)]
rv_dataframe_filled


Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,AAP,160.56,,18.95,,3.51,,0.85,,10.459505,,2.199,,
1,AAPL,144.1,,22.96,,39.03,,5.99,,17.886103,,13.984326,,
2,ABBV,138.46,,19.21,,16.99,,4.29,,10.806597,,7.675995,,
3,ABC,139.55,,15.73,,128.72,,0.1242,,8.771771,,3.607946,,
4,ABMD,249.21,,53.02,,7.6,,10.84,,37.09303,,12.252863,,
5,ABT,97.03,,20.23,,4.87,,3.84,,14.436742,,6.679149,,
6,ACN,264.4,,26.14,,8.18,,2.94,,14.257263,,8.219176,,
7,ADBE,287.4,,28.29,,9.0,,7.56,,20.830873,,8.544242,,
8,ADI,142.4,,39.83,,1.98,,6.55,,21.829415,,11.53949,,
9,ADM,82.7,,13.17,,1.93,,0.4857,,10.26432,,7.957115,,


# Dealing with Price-to-Earnings Ratio < 0

An negative PeRatio indicates that the company has a negative profit, it means, a loss. 
In this approach, I'll remove companies that had a loss.

In [24]:
for row in rv_dataframe_filled.index:
    if (rv_dataframe_filled.loc[row,'Price-to-Earnings Ratio'] < 0):
        rv_dataframe_filled.drop(row)
        print(rv_dataframe_filled.loc[row,'Ticker'])
rv_dataframe_filled

AES
BA
BIO
BKR
CAH
CCL
CXO
FTI
GE
GPS
ILMN
LYV
NBL
NCLH
NOV
PEG
PRGO
RCL
TWTR
UAL
WYNN
XRX


Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,AAP,160.56,,18.95,,3.51,,0.85,,10.459505,,2.199,,
1,AAPL,144.1,,22.96,,39.03,,5.99,,17.886103,,13.984326,,
2,ABBV,138.46,,19.21,,16.99,,4.29,,10.806597,,7.675995,,
3,ABC,139.55,,15.73,,128.72,,0.1242,,8.771771,,3.607946,,
4,ABMD,249.21,,53.02,,7.6,,10.84,,37.09303,,12.252863,,
5,ABT,97.03,,20.23,,4.87,,3.84,,14.436742,,6.679149,,
6,ACN,264.4,,26.14,,8.18,,2.94,,14.257263,,8.219176,,
7,ADBE,287.4,,28.29,,9.0,,7.56,,20.830873,,8.544242,,
8,ADI,142.4,,39.83,,1.98,,6.55,,21.829415,,11.53949,,
9,ADM,82.7,,13.17,,1.93,,0.4857,,10.26432,,7.957115,,


# Calculating Value Percentiles
Metrics:

 1. price to earnings ratio
 2. price to book ratio
 3. price to sales ratio
 4. EV/EBITDA
 5. EV/GP

In [25]:
from scipy.stats import percentileofscore as score

metrics = {
    'Price-to-Earnings Ratio': 'PE Percentile',
    'Price-to-Book Ratio' : 'PB Percentile',
    'Price-to-Sales Ratio': 'PS Percentile',
    'EV/EBITDA': 'EV/EBITDA Percentile',
    'EV/GP' : 'EV/GP Percentile',
}

for metric in metrics.keys():
    for row in rv_dataframe.index:
        rv_dataframe.loc[row, metrics[metric]] = stats.percentileofscore(rv_dataframe[metric], rv_dataframe.loc[row, metric])

rv_dataframe

Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,AAP,160.56,,18.95,49.297189,3.51,56.2249,0.85,13.955823,10.459505,37.349398,2.199,14.257028,
1,AAPL,144.1,,22.96,61.84739,39.03,97.791165,5.99,84.939759,17.886103,75.702811,13.984326,91.566265,
2,ABBV,138.46,,19.21,51.004016,16.99,93.7751,4.29,76.004016,10.806597,39.35743,7.675995,66.26506,
3,ABC,139.55,,15.73,40.361446,128.72,99.39759,0.1242,0.401606,8.771771,28.915663,3.607946,27.309237,
4,ABMD,249.21,,53.02,92.369478,7.6,79.317269,10.84,96.787149,37.09303,97.389558,12.252863,88.554217,
5,ABT,97.03,,20.23,53.012048,4.87,67.068273,3.84,71.485944,14.436742,62.851406,6.679149,54.819277,
6,ACN,264.4,,26.14,69.879518,8.18,80.522088,2.94,56.526104,14.257263,61.84739,8.219176,69.076305,
7,ADBE,287.4,,28.29,74.096386,9.0,82.53012,7.56,89.558233,20.830873,84.738956,8.544242,71.88755,
8,ADI,142.4,,39.83,86.947791,1.98,36.345382,6.55,87.148594,21.829415,87.751004,11.53949,86.746988,
9,ADM,82.7,,13.17,31.124498,1.93,35.040161,0.4857,7.028112,10.26432,35.742972,7.957115,67.670683,


# Calculating RV Score

We'll now calculate our RV Score (Robust Value), which is the value score that we'll use to filter for stocks in this invest strategy. 

The RV Score will be the arithmetic mean of the 4 percentile scores that we calculated in the last section. 

To calculate arithmetic mean, we will use the mean function from Python's built-in statistics module. 

In [26]:
from statistics import mean

for row in rv_dataframe.index:
    pe_ratio = rv_dataframe.loc[row, 'PE Percentile']
    pb_ratio = rv_dataframe.loc[row, 'PB Percentile']
    ps_ratio = rv_dataframe.loc[row, 'PS Percentile']
    ev_to_ebitda = rv_dataframe.loc[row, 'EV/EBITDA Percentile']
    ev_to_gp = rv_dataframe.loc[row, 'EV/GP Percentile']
    rv_dataframe.loc[row, 'RV Score'] = round(mean([pe_ratio, pb_ratio, ps_ratio, ev_to_ebitda, ev_to_gp]))
    
rv_dataframe[['Ticker', 'PE Percentile', 'PB Percentile', 'PS Percentile', 'EV/EBITDA Percentile', 'EV/GP Percentile', 'RV Score']]

Unnamed: 0,Ticker,PE Percentile,PB Percentile,PS Percentile,EV/EBITDA Percentile,EV/GP Percentile,RV Score
0,AAP,49.297189,56.2249,13.955823,37.349398,14.257028,34
1,AAPL,61.84739,97.791165,84.939759,75.702811,91.566265,82
2,ABBV,51.004016,93.7751,76.004016,39.35743,66.26506,65
3,ABC,40.361446,99.39759,0.401606,28.915663,27.309237,39
4,ABMD,92.369478,79.317269,96.787149,97.389558,88.554217,91
5,ABT,53.012048,67.068273,71.485944,62.851406,54.819277,62
6,ACN,69.879518,80.522088,56.526104,61.84739,69.076305,68
7,ADBE,74.096386,82.53012,89.558233,84.738956,71.88755,81
8,ADI,86.947791,36.345382,87.148594,87.751004,86.746988,77
9,ADM,31.124498,35.040161,7.028112,35.742972,67.670683,35


# Selecting the 10th best value stocks

In [33]:
rv_dataframe.sort_values('RV Score', ascending=True, inplace=True)
rv_dataframe.reset_index(drop=True, inplace=True)
rv_dataframe.head(10)


Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,PVH,45.6,,3.49,5.220884,0.5767,7.028112,0.327,3.614458,3.942586,7.429719,0.848808,3.815261,5
1,KSS,26.0,,4.92,8.032129,0.6499,7.228916,0.1601,0.60241,3.871554,6.626506,0.99551,5.62249,6
2,AIG,48.87,,3.12,4.819277,0.8432,9.437751,0.6346,10.240964,1.904143,1.807229,0.622571,1.606426,6
3,F,11.2,,3.91,5.421687,1.0,12.248996,0.3038,2.610442,2.821268,3.212851,1.520249,9.437751,7
4,GM,33.02,,6.21,11.445783,0.7411,8.032129,0.3666,4.417671,1.837004,1.606426,1.494881,9.236948,7
5,SYF,28.78,,4.2,5.823293,1.08,13.855422,0.7761,13.052209,2.620833,2.409639,0.862525,4.016064,8
6,C,42.37,,5.49,9.036145,0.4574,6.425703,0.9663,17.871486,3.269531,4.016064,0.972402,5.220884,9
7,CAH,69.64,,-20.12,1.807229,-24.97,2.610442,0.0999,0.200803,6.683073,19.678715,2.697495,20.281124,9
8,HPQ,25.14,,4.49,7.228916,-11.21,4.618474,0.4026,5.421687,5.433039,12.851406,2.288986,15.060241,9
9,UAA,6.7,,8.8,17.068273,1.28,17.068273,0.4808,6.827309,2.506891,2.208835,0.56941,1.204819,9


# Calculating Number of Shares to Buy

In [80]:
portifolio_size = 100000
position_size = float(portifolio_size)/len(rv_dataframe.index)

for row in rv_dataframe.index:
    rv_dataframe.loc[row,'Number of Shares to Buy'] = round(position_size/rv_dataframe.loc[row,'Price'])

rv_dataframe

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,Ticker,Price,Number of Shares to Buy,Price-to-Earnings Ratio,PE Percentile,Price-to-Book Ratio,PB Percentile,Price-to-Sales Ratio,PS Percentile,EV/EBITDA,EV/EBITDA Percentile,EV/GP,EV/GP Percentile,RV Score
0,AIG,54.06,37,4.67,6.2249,0.7515,7.429719,0.7893,11.84739,2.81548,2.811245,0.762673,1.807229,6
1,GM,33.04,61,5.55,7.831325,0.762,7.630522,0.3716,3.212851,1.773753,1.807229,1.472489,8.433735,6
2,UNM,34.89,57,7.5,11.84739,0.6206,7.028112,0.573,7.028112,3.667279,4.819277,0.558628,0.803213,6
3,LNC,49.04,41,7.07,10.843373,0.5681,6.626506,0.435,4.216867,5.072031,10.441767,0.40941,0.200803,6
4,F,11.74,170,4.05,5.220884,0.9915,11.445783,0.3406,2.208835,3.402186,3.413655,1.78455,10.441767,7
5,PVH,62.86,32,4.47,6.024096,0.7858,8.032129,0.4567,4.819277,4.707316,9.036145,1.081342,5.220884,7
6,GPS,8.97,223,-48.24,1.204819,1.42,19.97992,0.2083,0.803213,4.850271,9.638554,0.696172,1.405622,7
7,PRU,95.9,21,7.68,12.248996,0.7929,8.232932,0.5137,5.421687,4.839246,9.437751,0.521683,0.60241,7
8,KSS,42.35,47,6.39,10.040161,1.17,13.855422,0.2769,1.405622,3.797653,5.421687,1.10319,5.823293,7
9,SYF,30.49,66,4.03,5.02008,1.23,15.562249,0.8968,13.654618,2.638389,2.409639,0.923374,3.413655,8


# Formatting our excel output

In [137]:
writer = pd.ExcelWriter('value_strategy2.xlsx', engine = 'xlsxwriter')
rv_dataframe.to_excel(writer, sheet_name='Value Strategy', index=False)


In [138]:
background_color = '#0a0a23'
font_color = '#ffffff'

string_template = writer.book.add_format(
        {
            'font_color': font_color,
            'bg_color': background_color,
            'border': 1
        }
    )

dollar_template = writer.book.add_format(
        {
            'num_format':'$0.00',
            'font_color': font_color,
            'bg_color': background_color,
            'border': 1
        }
    )

integer_template = writer.book.add_format(
        {
            'num_format':'0',
            'font_color': font_color,
            'bg_color': background_color,
            'border': 1
        }
    )

float_template = writer.book.add_format(
        {
            'num_format':'0',
            'font_color': font_color,
            'bg_color': background_color,
            'border': 1
        }
    )

percent_template = writer.book.add_format(
        {
            'num_format':'0.0%',
            'font_color': font_color,
            'bg_color': background_color,
            'border': 1
        }
    )

In [139]:
column_formats = {
                    'A': ['Ticker', string_template],
                    'B': ['Price', dollar_template],
                    'C': ['Number of Shares to Buy', integer_template],
                    'D': ['Price-to-Earnings Ratio', float_template],
                    'E': ['PE Percentile', percent_template],
                    'F': ['Price-to-Book Ratio', float_template],
                    'G': ['PB Percentile',percent_template],
                    'H': ['Price-to-Sales Ratio', float_template],
                    'I': ['PS Percentile', percent_template],
                    'J': ['EV/EBITDA', float_template],
                    'K': ['EV/EBITDA Percentile', percent_template],
                    'L': ['EV/GP', float_template],
                    'M': ['EV/GP Percentile', percent_template],
                    'N': ['RV Score', percent_template]
                 }

for column in column_formats.keys():
    writer.sheets['Value Strategy'].set_column(f'{column}:{column}', 25, column_formats[column][1])
    writer.sheets['Value Strategy'].write(f'{column}1', column_formats[column][0], column_formats[column][1])

In [140]:
writer.save()