In [1]:
author = 'Nikolai Melnikov'
author_url = 'https://www.linkedin.com/in/nikolaimelnikov/'

In [None]:
!pip install finviz
!pip install yfinance
!pip install alpaca_trade_api
!pip install newsapi-python
!pip install flair

In [4]:
import pandas as pd
import numpy as np
import finviz
from finviz.screener import Screener
import nest_asyncio
import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override() 
nest_asyncio.apply()

# Choosing stocks with the growth potential

In [6]:
filters_long = ['cap_microover', 'fa_curratio_o1','fa_debteq_u0.8','fa_eps5years_pos','fa_epsyoy_o10','fa_estltgrowth_pos','fa_roe_o10','sh_instown_o10','ta_rsi_nos40']  
stock_list_long = Screener(filters=filters_long, table='Overview', order='price') 
stock_list_long.to_csv("stocks_long.csv")
stocks_long = pd.read_csv('stocks_long.csv', index_col=['Ticker'])

In [7]:
filters_growth = ['fa_eps5years_pos', 'fa_epsqoq_o20','fa_epsyoy_o25','fa_epsyoy1_o15','fa_estltgrowth_pos','fa_roe_o10','sh_instown_o10','sh_price_o10','ta_highlow52w_a90h','ta_rsi_nos50']  
stock_list_growth = Screener(filters=filters_growth, table='Overview', order='price') 
stock_list_growth.to_csv("stocks_growth.csv")
stocks_growth = pd.read_csv('stocks_growth.csv', index_col=['Ticker'])

stocks = pd.concat([stocks_long, stocks_growth])
stocks.drop(['No.','Company','Country','Change', 'Sector', 'Volume'], inplace=True, axis=1)
stocks.drop_duplicates(inplace=True)

In [8]:
stocks

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NAT,Marine Shipping,463.55M,5.17,3.20
CO,Diagnostics & Research,496.50M,6.45,4.16
DHT,Oil & Gas Midstream,1.01B,3.26,5.97
KGC,Gold,9.08B,8.49,7.24
ACOR,Biotechnology,71.29M,1.38,7.35
...,...,...,...,...
ESGR,Insurance - Diversified,4.72B,4.26,211.89
BLD,Engineering & Construction,7.12B,32.58,216.59
MED,Personal Services,2.99B,31.42,251.01
FB,Internet Content & Information,783.34B,26.58,268.10


In [9]:
industry_dict = {
    'Oil & Gas Midstream': 'ind_oilgasmidstream',
    'Gold':'ind_gold',
    'Scientific & Technical Instruments':'ind_scientifictechnicalinstruments',
    'Apparel Manufacturing':'ind_apparelmanufacturing',
    'Drug Manufacturers - Specialty & Generic':'ind_drugmanufacturersspecialtygeneric',
    'Household & Personal Products':'ind_householdpersonalproducts',
    'Software - Application':'ind_softwareapplication',
    'Education & Training Services':'ind_educationtrainingservices',
    'Chemicals':'ind_chemicals',
    'Staffing & Employment Services':'ind_staffingemploymentservices',
    'Packaging & Containers':'ind_packagingcontainers',
    'Asset Management':'ind_assetmanagement',
    'Diagnostics & Research':'ind_diagnosticsresearch',
    'Communication Equipment':'ind_communicationequipment',
    'Pollution & Treatment Controls':'ind_pollutiontreatmentcontrols',
    'Electronic Components':'ind_electroniccomponents',
    'Apparel Retail':'ind_apparelretail',
    'Lumber & Wood Production':'ind_lumberwoodproduction',
    'Medical Devices':'ind_medicaldevices',
    'Drug Manufacturers - General':'ind_drugmanufacturersgeneral',
    'Biotechnology':'ind_biotechnology',
    'Consulting Services':'ind_consultingservices',
    'Beverages - Non-Alcoholic':'ind_beveragesnonalcoholic',
    'Farm & Heavy Construction Machinery':'ind_farmheavyconstructionmachinery',
    'Computer Hardware':'ind_computerhardware',
    'Internet Content & Information':'ind_internetcontentinformation',
    'Specialty Business Services':'ind_specialtybusinessservices',
    'Software - Infrastructure':'ind_softwareinfrastructure',
    'Semiconductor Equipment & Materials':'ind_semiconductorequipmentmaterials',
    'Electronic Gaming & Multimedia':'ind_electronicgamingmultimedia',
    'Security & Protection Services':'ind_securityprotectionservices',
    'Personal Services':'ind_personalservices',
    'Internet Retail':'ind_internetretail',
    'Financial Data & Stock Exchanges':'ind_financialdatastockexchanges',
    'Medical Instruments & Supplies':'ind_medicalinstrumentssupplies',
    'Health Information Services':'ind_healthinformationservices',
    'Credit Services':'ind_creditservices',
    'Information Technology Services':'ind_informationtechnologyservices',
    'Beverages - Brewers':'ind_beveragesbrewers',
    'Engineering & Construction':'ind_engineeringconstruction',
    'Capital Markets':'ind_capitalmarkets',
    'Insurance - Diversified':'ind_insurancediversified',
    'Specialty Industrial Machinery':'ind_specialtyindustrialmachinery',
    'Medical Distribution':'ind_medicaldistribution',
    'Building Products & Equipment':'ind_buildingproductsequipment',
    'Marine Shipping': 'ind_marineshipping',
    'Utilities - Diversified': 'ind_utilitiesdiversified',
    'Grocery Stores': 'ind_grocerystores',
    'Trucking':'ind_trucking',
    'Medical Care Facilities':'ind_medicalcarefacilities',
    'Auto Parts':'ind_autoparts',
    'Furnishings, Fixtures & Appliances':'ind_furnishingsfixturesappliances',
    'Residential Construction':'ind_residentialconstruction',
    'Industrial Distribution':'ind_industrialdistribution',
    'Insurance Brokers':'ind_insurancebrokers',
    'Packaged Foods':'ind_packagedfoods',
    'Airports & Air Services':'ind_airportsairservices',
    'Semiconductors':'ind_semiconductors',
    'Publishing':'ind_publishing', 
    'Electronics & Computer Distribution':'ind_electronicscomputerdistribution',
    'Real Estate Services':'ind_realestateservices',
    'Auto & Truck Dealerships':'ind_autotruckdealerships',
    'Recreational Vehicles':'ind_recreationalvehicles',
    'Building Materials':'ind_buildingmaterials',
    'Electrical Equipment & Parts':'ind_electricalequipmentparts',
    'Specialty Retail':'ind_specialtyretail',
    'Insurance - Life':'ind_insurancelife',
    'Food Distribution':'ind_fooddistribution',
    'Metal Fabrication':'ind_metalfabrication',
    'Business Equipment & Supplies':'ind_businessequipmentsupplies',
    'Integrated Freight & Logistics':'ind_integratedfreightlogistics',
    'Tools & Accessories':'ind_toolsaccessories',
    'Discount Stores':'ind_discountstores', 
    'Leisure':'ind_leisure'
}

In [10]:
#for each company find good performing competitor from the same industry
competitors = []
i = 0
for row in stocks.index:
  ind = industry_dict[stocks['Industry'].iloc[i]]
  i += 1
  filters_competitor = [ind, 'cap_smallover','fa_pe_o5','sh_price_o80','ta_rsi_nos40']  
  competitors_list = []
  try:
    stock_list_competitor = Screener(filters=filters_competitor, table='Overview', order='price') 
    for stock in stock_list_competitor: 
      competitors_list.append(stock['Ticker'])
  except Exception:
    pass
  if not competitors_list:
    competitors.append('-')
  else:
    competitors.append(competitors_list)
  
stocks['Competitors'] = competitors

In [22]:
#Select stocks with performing competitors and with a low price
stocks_to_analyze = stocks[(stocks['Competitors'] != '-')&(stocks['Price'] <= 100)]

In [23]:
stocks_to_analyze

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price,Competitors
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CO,Diagnostics & Research,496.50M,6.45,4.16,"[NEOG, DGX, PRAH, A, FLGT, PKI, MEDP, IQV, ICL..."
ELMD,Medical Devices,93.18M,25.99,10.76,"[EW, NARI, CNMD, MDT, ABT, SYK, PODD, ABMD, BI..."
EGAN,Software - Application,366.09M,47.04,11.76,"[STNE, RP, APPS, MANT, PLUS, CDAY, ALRM, SAP, ..."
ECOM,Software - Application,687.82M,38.84,23.19,"[STNE, RP, APPS, MANT, PLUS, CDAY, ALRM, SAP, ..."
WLKP,Chemicals,837.19M,12.2,23.75,"[EMN, CE]"
NWPX,Metal Fabrication,311.00M,12.1,31.8,"[VMI, PRLB]"
ACU,Household & Personal Products,109.38M,16.17,32.65,"[SPB, USNA, KMB, HELE, EL]"
STRL,Engineering & Construction,635.72M,10.58,22.02,"[MTZ, DY, EME, NVEE, J, TTEK, BLD]"


In [None]:
#replace Competotors' tickers with price change in years

from datetime import timedelta
from datetime import date

results = pd.DataFrame(columns=['Start','End','Difference'])
results_row = pd.DataFrame(columns=['Avg'])
#count average % change for last 3 years
for row in stocks_to_analyze.index:
  for competitor in stocks_to_analyze.Competitors[row]:
    data = pdr.get_data_yahoo(competitor, start=date.today()-timedelta(days=365*3), end=date.today())
    temp = pd.DataFrame(
        {
            #'Ticker': ticker,
            'Start': data.Open.iloc[0],
            'End': data.Open.iloc[-1],
            'Difference': data.Open.iloc[-1]/data.Open.iloc[0]
        }, index=[competitor]
    )
    results = pd.concat([results, temp])
  temp_row = pd.DataFrame(
      {
          'Avg':results.Difference.mean()
      }, index = [row]
  )
  results_row = pd.concat([results_row, temp_row])
stocks_to_analyze.drop('Competitors', axis=1, inplace=True)
stocks_to_analyze['competitors_performance'] = results_row

In [26]:
stocks_to_analyze

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price,competitors_performance
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CO,Diagnostics & Research,496.50M,6.45,4.16,3.757855
ELMD,Medical Devices,93.18M,25.99,10.76,3.214306
EGAN,Software - Application,366.09M,47.04,11.76,3.970049
ECOM,Software - Application,687.82M,38.84,23.19,4.037479
WLKP,Chemicals,837.19M,12.2,23.75,3.964416
NWPX,Metal Fabrication,311.00M,12.1,31.8,3.908588
ACU,Household & Personal Products,109.38M,16.17,32.65,3.771332
STRL,Engineering & Construction,635.72M,10.58,22.02,3.629473


In [None]:
results_ticker = pd.DataFrame(columns=['Difference'])
#count average % change for last 3 years
for ticker in stocks_to_analyze.index:
  data = pdr.get_data_yahoo(ticker, start=date.today()-timedelta(days=365*3), end=date.today())
  temp = pd.DataFrame(
       {
           'Difference': data.Open.iloc[-1]/data.Open.iloc[0]
       }, index=[ticker]
   )
  results_ticker = pd.concat([results_ticker, temp])
stocks_to_analyze['companys_increase'] = results_ticker
stocks_to_analyze['difference_with_industry'] = stocks_to_analyze['competitors_performance']-stocks_to_analyze['companys_increase']

In [28]:
stocks_to_analyze

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price,competitors_performance,companys_increase,difference_with_industry
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CO,Diagnostics & Research,496.50M,6.45,4.16,3.757855,0.388889,3.368966
ELMD,Medical Devices,93.18M,25.99,10.76,3.214306,1.81579,1.398516
EGAN,Software - Application,366.09M,47.04,11.76,3.970049,2.312871,1.657177
ECOM,Software - Application,687.82M,38.84,23.19,4.037479,2.428571,1.608908
WLKP,Chemicals,837.19M,12.2,23.75,3.964416,,
NWPX,Metal Fabrication,311.00M,12.1,31.8,3.908588,1.775613,2.132975
ACU,Household & Personal Products,109.38M,16.17,32.65,3.771332,1.281804,2.489528
STRL,Engineering & Construction,635.72M,10.58,22.02,3.629473,1.720094,1.909379


# Sentiment Analysis

In [16]:
import alpaca_trade_api as tradeapi
from newsapi import NewsApiClient
from flair.data import Sentence
import torch
import flair

In [34]:
api = tradeapi.REST('gGXztKtAyug4obVCBeLxheacZ4hUJ48O',
                    'https://api.polygon.io')


def sentiment(stock, api):
    url = 'https://www.tradingview.com/screener/'
    flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
    newsapi = NewsApiClient(api_key='d1b690fc599140eda59b6dcf0b8b5c8c')
    response = newsapi.get_everything(qintitle=stock)
    news = api.polygon.news(stock)
    file = open('news.txt', 'w')

    sentiment = 0
    for line in response['articles']:
        words = str(line['title'])
        file.write(words)
        sentence = Sentence(str(words))
        flair_sentiment.predict(sentence)
        total_sentiment = sentence.labels

        if total_sentiment[0].value == 'NEGATIVE':
            sentiment -= total_sentiment[0].to_dict()['confidence'] 
        elif total_sentiment[0].value == 'POSITIVE':
            sentiment += total_sentiment[0].to_dict()['confidence']

    for source in news:
        words = source.summary
        try:
            file.write(words)
        except:
            print('FAILSAFE ACTIVATED')
        file.write('\n')
        sentence = Sentence(str(words))
        try:
            flair_sentiment.predict(sentence)
        except:
            print("\n")
        total_sentiment = sentence.labels
        if total_sentiment[0].value == 'NEGATIVE':
          total_sentiment[0].to_dict()['confidence']
        if total_sentiment[0].value == 'POSITIVE':
            total_sentiment[0].to_dict()['confidence']
            sentiment += total_sentiment[0].to_dict()['confidence']

    file.close()
    return sentiment


In [None]:
sentiments = pd.DataFrame(columns=['Sentiment'])
import time
for ticker in stocks_to_analyze.index:
  x = sentiment(ticker, api)
  temp = pd.DataFrame(
       {
           'Sentiment': x
       }, index=[ticker]
   )
  sentiments = pd.concat([sentiments, temp])
  time.sleep(15)
stocks_to_analyze['Sentiment'] = sentiments

In [None]:
stocks_to_analyze['difference_multiplier'] = ''
stocks_to_analyze['sentiment_multiplier'] = ''
difference_sent = stocks_to_analyze['Sentiment'].mean()
difference = stocks_to_analyze['difference_with_industry'].mean()
for ticker in stocks_to_analyze.index:
  if stocks_to_analyze['difference_with_industry'].loc[ticker] > difference:
    stocks_to_analyze['difference_multiplier'][ticker] =  2
  elif (stocks_to_analyze['difference_with_industry'].loc[ticker] < difference) & (stocks_to_analyze['difference_with_industry'].loc[ticker] > 0):
    stocks_to_analyze['difference_multiplier'][ticker] =  1
  else:
    stocks_to_analyze['difference_multiplier'][ticker] = 0.5
  
  if stocks_to_analyze['Sentiment'].loc[ticker] > difference_sent:
    stocks_to_analyze['sentiment_multiplier'][ticker] =  2
  elif (stocks_to_analyze['Sentiment'].loc[ticker] < difference_sent) & (stocks_to_analyze['Sentiment'].loc[ticker] > 0):
    stocks_to_analyze['sentiment_multiplier'][ticker] =  1
  else:
    stocks_to_analyze['sentiment_multiplier'][ticker] = 0.5

stocks_to_analyze['total_multiplier'] = stocks_to_analyze['sentiment_multiplier'] * stocks_to_analyze['difference_multiplier']
stocks_to_analyze.drop(['difference_multiplier','sentiment_multiplier'], axis=1, inplace=True)

In [31]:
stocks_to_analyze

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price,competitors_performance,companys_increase,difference_with_industry,Sentiment,total_multiplier
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
CO,Diagnostics & Research,496.50M,6.45,4.16,3.757855,0.388889,3.368966,2.200826,2.0
ELMD,Medical Devices,93.18M,25.99,10.76,3.214306,1.81579,1.398516,21.00604,2.0
EGAN,Software - Application,366.09M,47.04,11.76,3.970049,2.312871,1.657177,25.894409,2.0
ECOM,Software - Application,687.82M,38.84,23.19,4.037479,2.428571,1.608908,30.02997,2.0
WLKP,Chemicals,837.19M,12.2,23.75,3.964416,,,15.953857,0.5
NWPX,Metal Fabrication,311.00M,12.1,31.8,3.908588,1.775613,2.132975,14.499956,2.0
ACU,Household & Personal Products,109.38M,16.17,32.65,3.771332,1.281804,2.489528,24.958013,4.0
STRL,Engineering & Construction,635.72M,10.58,22.02,3.629473,1.720094,1.909379,18.769752,1.0


# Portfolio allocation

In [None]:
total_money = 50000
total_mult = stocks_to_analyze.total_multiplier.sum()
stocks_to_analyze['money_to_place'] = ''
stocks_to_analyze['stocks_to_buy'] = ''
for row in stocks_to_analyze.index:
  stocks_to_analyze['money_to_place'][row] =  (total_money * (stocks_to_analyze['total_multiplier'][row]/total_mult) )  
  stocks_to_analyze['stocks_to_buy'][row] = (stocks_to_analyze['money_to_place'][row] / stocks_to_analyze['Price'][row]).round()

In [33]:
stocks_to_analyze

Unnamed: 0_level_0,Industry,Market Cap,P/E,Price,competitors_performance,companys_increase,difference_with_industry,Sentiment,total_multiplier,money_to_place,stocks_to_buy
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
CO,Diagnostics & Research,496.50M,6.45,4.16,3.757855,0.388889,3.368966,2.200826,2.0,6451.61,1551
ELMD,Medical Devices,93.18M,25.99,10.76,3.214306,1.81579,1.398516,21.00604,2.0,6451.61,600
EGAN,Software - Application,366.09M,47.04,11.76,3.970049,2.312871,1.657177,25.894409,2.0,6451.61,549
ECOM,Software - Application,687.82M,38.84,23.19,4.037479,2.428571,1.608908,30.02997,2.0,6451.61,278
WLKP,Chemicals,837.19M,12.2,23.75,3.964416,,,15.953857,0.5,1612.9,68
NWPX,Metal Fabrication,311.00M,12.1,31.8,3.908588,1.775613,2.132975,14.499956,2.0,6451.61,203
ACU,Household & Personal Products,109.38M,16.17,32.65,3.771332,1.281804,2.489528,24.958013,4.0,12903.2,395
STRL,Engineering & Construction,635.72M,10.58,22.02,3.629473,1.720094,1.909379,18.769752,1.0,3225.81,146
