# AHP-TOPSIS w/ Sentiment and Predicted Price Change (PPC)

Assuming that all required data is supplied and stocks are pre-selected for ranking.

In [5]:
!pip install yahoo_fin
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 KB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 KB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting requests>=2.26
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting frozendict>=2.3.4
  Downloading frozendict-2.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [17]:
# imports
import yahoo_fin.stock_info as si
import yfinance as yf
import requests
import pandas as pd
from bs4 import BeautifulSoup
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import random

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [7]:
# stock list
def fetch_stock_list():
    url = "https://api.nasdaq.com/api/quote/list-type/nasdaq100"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}
    response = requests.get(url, headers=headers)
    data = response.json()
    stock_list = [item["symbol"] for item in data["data"]["data"]["rows"]]
    return stock_list

# name, sector, industry, description
def stock_general_info(ticker):

    # name, sector, industry - yahoo finance
    url = f"https://finance.yahoo.com/quote/{ticker}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    name = soup.find("h1", class_="D(ib)").text
    sector = soup.find("span", text="Sector(s)").find_next_sibling("span").text
    industry = soup.find("span", text="Industry").find_next_sibling("span").text

    # general info - marketwatch
    url = f"https://www.marketwatch.com/investing/stock/{ticker}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    description = soup.find("p", class_="description__text").text.strip()
    

    return (name,sector,industry,description)

# fundamental ratios and data
def fundamental_data(ticker):
    data = si.get_quote_table(ticker)
    return data

# yearly prices
def yearly_prices(ticker):
    data = yf.download(ticker, period="1y", interval="1d")
    return data[['Adj Close']].reset_index()

# intraday prices
def intraday_prices(ticker):
    data = yf.download(ticker, period="1d", interval="2m")
    return data[['Adj Close']].reset_index()

# sentiment analysis
def find_sentiment(ticker):
    url = f'https://finviz.com/quote.ashx?t={ticker}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, features='html.parser')
    news_table = soup.find(id='news-table')

    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = []
    for row in news_table.findAll('tr'):
        date_data = row.td.text.split(' ')
        if len(date_data) == 1:
            date = date
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
        headline = row.a.text
        print(headline)
        sentiment_score = analyzer.polarity_scores(headline)['compound']
        sentiment_scores.append(sentiment_score)


    overall_sentiment_score = sum(sentiment_scores) / len(sentiment_scores)
    return(overall_sentiment_score)

In [18]:
stockList = fetch_stock_list()
tickers = {name: yf.Ticker(name) for name in stockList}

criteria = ['Beta (5Y Monthly)',
            'EPS (TTM)',
            'PE Ratio (TTM)',
            'Quote Price',
            'Volume',
            'Sentiment',
            'Predicted Price Change']

data = []
for stock in stockList:
  stockData = fundamental_data(stock)
  stockData['Sentiment'] = random.randint(-1,1) #find_sentiment(stock)
  data.append(stockData)
data

[{'1y Target Est': 80.71,
  '52 Week Range': '51.10 - 71.68',
  'Ask': '0.00 x 1200',
  'Avg. Volume': 1842350.0,
  'Beta (5Y Monthly)': 1.21,
  'Bid': '0.00 x 900',
  "Day's Range": '70.27 - 71.25',
  'EPS (TTM)': 2.26,
  'Earnings Date': 'May 16, 2023 - May 22, 2023',
  'Ex-Dividend Date': nan,
  'Forward Dividend & Yield': 'N/A (N/A)',
  'Market Cap': '33.943B',
  'Open': 70.57,
  'PE Ratio (TTM)': 31.51,
  'Previous Close': 70.42,
  'Quote Price': 71.22000122070312,
  'Volume': 1482498.0,
  'Sentiment': 0},
 {'1y Target Est': 258.76,
  '52 Week Range': '223.30 - 296.67',
  'Ask': '0.00 x 900',
  'Avg. Volume': 2509840.0,
  'Beta (5Y Monthly)': 0.65,
  'Bid': '0.00 x 1200',
  "Day's Range": '227.37 - 231.80',
  'EPS (TTM)': 12.1,
  'Earnings Date': 'Apr 25, 2023 - May 01, 2023',
  'Ex-Dividend Date': 'Feb 14, 2023',
  'Forward Dividend & Yield': '8.52 (3.73%)',
  'Market Cap': '121.976B',
  'Open': 230.16,
  'PE Ratio (TTM)': 18.88,
  'Previous Close': 228.72,
  'Quote Price': 228.4

In [29]:
df = pd.DataFrame(data, dtype='float64')
df = df[ [x for x in criteria if x in df.columns] ]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [56]:
# Removing negative values as detailed in study
# Replacing Nan Values with minimum value in that column
for index, value in df.min().iteritems():
  if value < 0:
    df[index] = df[index] - value

df = df.fillna(value = dict(df.min()))
df

Unnamed: 0,Beta (5Y Monthly),EPS (TTM),PE Ratio (TTM),Quote Price,Volume,Sentiment
0,1.43,29.34,31.51,71.220001,1482498.0,1.0
1,0.87,39.18,18.88,228.429993,1842360.0,2.0
2,1.22,28.29,29.67,35.900002,17450058.0,2.0
3,1.03,29.02,13.39,25.980000,36071934.0,1.0
4,1.60,51.41,15.92,387.410004,958534.0,2.0
...,...,...,...,...,...,...
87,1.72,23.21,0.00,15.340000,12563897.0,2.0
88,1.47,26.81,0.00,93.919998,44899128.0,2.0
89,1.41,25.62,0.00,8.340000,10208015.0,1.0
90,1.81,34.32,23.96,173.440002,1184562.0,1.0


# Using AHP Implementation

In [31]:
!pip install ahpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ahpy
  Downloading ahpy-2.0-py3-none-any.whl (21 kB)
Installing collected packages: ahpy
Successfully installed ahpy-2.0


In [35]:
# Randomly assign weights
import itertools
pairs = list(itertools.combinations(criteria,2))
comparisons = {pair: random.randint(1,10) for pair in pairs}

In [38]:
import ahpy
criteriaAHP = ahpy.Compare(name="criteriaAHP", comparisons=comparisons)
weights = criteriaAHP.target_weights
weights

{'EPS (TTM)': 0.354,
 'Beta (5Y Monthly)': 0.2648,
 'PE Ratio (TTM)': 0.1195,
 'Quote Price': 0.1086,
 'Volume': 0.0933,
 'Predicted Price Change': 0.0328,
 'Sentiment': 0.027}

In [42]:
# Do we want to maximize this criteria (TRUE) or not (FALSE)
# EPS, Beta, PE, Price, Volume, PPC, Sentiment
maximize = [True, False, True, False, True, True, True]

# Using TOPSIS Implementation

In [39]:
# https://github.com/Glitchfix/TOPSIS-Python

import numpy as np
import warnings


class Topsis():
    evaluation_matrix = np.array([])  # Matrix
    weighted_normalized = np.array([])  # Weight matrix
    normalized_decision = np.array([])  # Normalisation matrix
    M = 0  # Number of rows
    N = 0  # Number of columns

    '''
	Create an evaluation matrix consisting of m alternatives and n criteria,
	with the intersection of each alternative and criteria given as {\displaystyle x_{ij}}x_{ij},
	we therefore have a matrix {\displaystyle (x_{ij})_{m\times n}}(x_{{ij}})_{{m\times n}}.
	'''

    def __init__(self, evaluation_matrix, weight_matrix, criteria):
        # M×N matrix
        self.evaluation_matrix = np.array(evaluation_matrix, dtype="float")

        # M alternatives (options)
        self.row_size = len(self.evaluation_matrix)

        # N attributes/criteria
        self.column_size = len(self.evaluation_matrix[0])

        # N size weight matrix
        self.weight_matrix = np.array(weight_matrix, dtype="float")
        self.weight_matrix = self.weight_matrix/sum(self.weight_matrix)
        self.criteria = np.array(criteria, dtype="float")

    '''
	# Step 2
	The matrix {\displaystyle (x_{ij})_{m\times n}}(x_{{ij}})_{{m\times n}} is then normalised to form the matrix
	'''

    def step_2(self):
        # normalized scores
        self.normalized_decision = np.copy(self.evaluation_matrix)
        sqrd_sum = np.zeros(self.column_size)
        for i in range(self.row_size):
            for j in range(self.column_size):
                sqrd_sum[j] += self.evaluation_matrix[i, j]**2
        for i in range(self.row_size):
            for j in range(self.column_size):
                self.normalized_decision[i,
                                         j] = self.evaluation_matrix[i, j]/(sqrd_sum[j]**0.5)

    '''
	# Step 3
	Calculate the weighted normalised decision matrix
	'''

    def step_3(self):
        from pdb import set_trace
        self.weighted_normalized = np.copy(self.normalized_decision)
        for i in range(self.row_size):
            for j in range(self.column_size):
                self.weighted_normalized[i, j] *= self.weight_matrix[j]

    '''
	# Step 4
	Determine the worst alternative {\displaystyle (A_{w})}(A_{w}) and the best alternative {\displaystyle (A_{b})}(A_{b}):
	'''

    def step_4(self):
        self.worst_alternatives = np.zeros(self.column_size)
        self.best_alternatives = np.zeros(self.column_size)
        for i in range(self.column_size):
            if self.criteria[i]:
                self.worst_alternatives[i] = min(
                    self.weighted_normalized[:, i])
                self.best_alternatives[i] = max(self.weighted_normalized[:, i])
            else:
                self.worst_alternatives[i] = max(
                    self.weighted_normalized[:, i])
                self.best_alternatives[i] = min(self.weighted_normalized[:, i])

    '''
	# Step 5
	Calculate the L2-distance between the target alternative {\displaystyle i}i and the worst condition {\displaystyle A_{w}}A_{w}
	{\displaystyle d_{iw}={\sqrt {\sum _{j=1}^{n}(t_{ij}-t_{wj})^{2}}},\quad i=1,2,\ldots ,m,}
	and the distance between the alternative {\displaystyle i}i and the best condition {\displaystyle A_{b}}A_b
	{\displaystyle d_{ib}={\sqrt {\sum _{j=1}^{n}(t_{ij}-t_{bj})^{2}}},\quad i=1,2,\ldots ,m}
	where {\displaystyle d_{iw}}d_{{iw}} and {\displaystyle d_{ib}}d_{{ib}} are L2-norm distances 
	from the target alternative {\displaystyle i}i to the worst and best conditions, respectively.
	'''

    def step_5(self):
        self.worst_distance = np.zeros(self.row_size)
        self.best_distance = np.zeros(self.row_size)

        self.worst_distance_mat = np.copy(self.weighted_normalized)
        self.best_distance_mat = np.copy(self.weighted_normalized)

        sqrd_sum = np.zeros(self.column_size)
        for i in range(self.row_size):
            for j in range(self.column_size):
                sqrd_sum[j] += self.evaluation_matrix[i, j]**2

        for i in range(self.row_size):
            for j in range(self.column_size):
                self.worst_distance_mat[i][j] = ((self.weighted_normalized[i][j]-self.worst_alternatives[j]) / sqrd_sum[j]**0.5)**2
                self.best_distance_mat[i][j] = ((self.weighted_normalized[i][j]-self.best_alternatives[j]) / sqrd_sum[j]**0.5)**2
                
                self.worst_distance[i] += self.worst_distance_mat[i][j]
                self.best_distance[i] += self.best_distance_mat[i][j]

        for i in range(self.row_size):
            self.worst_distance[i] = self.worst_distance[i]**0.5
            self.best_distance[i] = self.best_distance[i]**0.5

    '''
	# Step 6
	Calculate the similarity
	'''

    def step_6(self):
        np.seterr(all='ignore')
        self.worst_similarity = np.zeros(self.row_size)
        self.best_similarity = np.zeros(self.row_size)

        for i in range(self.row_size):
            # calculate the similarity to the worst condition
            self.worst_similarity[i] = self.worst_distance[i] / \
                (self.worst_distance[i]+self.best_distance[i])

            # calculate the similarity to the best condition
            self.best_similarity[i] = self.best_distance[i] / \
                (self.worst_distance[i]+self.best_distance[i])
    
    def ranking(self, data):
        return [i+1 for i in data.argsort()]

    def rank_to_worst_similarity(self):
        # return rankdata(self.worst_similarity, method="min").astype(int)
        return self.ranking(self.worst_similarity)

    def rank_to_best_similarity(self):
        # return rankdata(self.best_similarity, method='min').astype(int)
        return self.ranking(self.best_similarity)

    def calc(self):
        print("Step 1\n", self.evaluation_matrix, end="\n\n")
        self.step_2()
        print("Step 2\n", self.normalized_decision, end="\n\n")
        self.step_3()
        print("Step 3\n", self.weighted_normalized, end="\n\n")
        self.step_4()
        print("Step 4\n", self.worst_alternatives,
              self.best_alternatives, end="\n\n")
        self.step_5()
        print("Step 5\n", self.worst_distance, self.best_distance, end="\n\n")
        self.step_6()
        print("Step 6\n", self.worst_similarity,
              self.best_similarity, end="\n\n")

# Results

In [49]:
evaluation_matrix = np.array(df)
criterias = np.array(maximize)

weights_list = list(weights.values())

t = Topsis(evaluation_matrix, weights_list, criterias)
t.calc()
rankings = t.rank_to_worst_similarity()

Step 1
 [[1.43000000e+00 2.93400000e+01 3.15100000e+01 7.12200012e+01
  1.48249800e+06 1.00000000e+00]
 [8.70000000e-01 3.91800000e+01 1.88800000e+01 2.28429993e+02
  1.84236000e+06 2.00000000e+00]
 [1.22000000e+00 2.82900000e+01 2.96700000e+01 3.59000015e+01
  1.74500580e+07 2.00000000e+00]
 [1.03000000e+00 2.90200000e+01 1.33900000e+01 2.59799995e+01
  3.60719340e+07 1.00000000e+00]
 [1.60000000e+00 5.14100000e+01 1.59200000e+01 3.87410004e+02
  9.58534000e+05 2.00000000e+00]
 [1.16000000e+00 3.09100000e+01 1.98300000e+01 7.59599991e+01
  2.01160500e+06 1.00000000e+00]
 [1.55000000e+00 3.93800000e+01 3.57100000e+01 4.39200012e+02
  2.01068000e+05 1.00000000e+00]
 [1.20000000e+00 3.11300000e+01 2.78200000e+01 1.12690002e+02
  1.08894300e+06 1.00000000e+00]
 [1.71000000e+00 6.43000000e+01 1.32600000e+01 4.93399994e+02
  1.05405200e+06 2.00000000e+00]
 [1.73000000e+00 3.08400000e+01 5.43900000e+01 2.04509995e+02
  1.01234900e+06 1.00000000e+00]
 [1.21000000e+00 3.14600000e+01 2.48900000

In [53]:
pd.DataFrame(rankings, index=stockList, columns=['Rank'])

Unnamed: 0,Rank
CPRT,83
AMGN,78
CMCSA,82
INTC,81
KLAC,84
...,...
WBD,91
AMZN,57
LCID,77
MAR,41


# Checking out Portfolio Mangament stuff

In [57]:
import operator as op
from functools import reduce

def ncr(n, r):
    r = min(r, n-r)
    numer = reduce(op.mul, range(n, n-r, -1), 1)
    denom = reduce(op.mul, range(1, r+1), 1)
    return numer // denom  # or / in Python 2

In [60]:
def totalComb(n):
  return sum( [ncr(n,i) for i in range(1,n) if ncr(n,i) > 10000] )

for i in range(10,30):
  print(i,totalComb(i))

10 0
11 0
12 0
13 0
14 0
15 0
16 35750
17 112268
18 236912
19 514216
20 1036184
21 2082058
22 4176086
23 8366802
24 16772566
25 33549180
26 67102960
27 134211120
28 268428090
29 536862732
