In [1]:
import numpy as np
import pandas as pd
import yfinance as yf

from scipy.optimize import minimize

# Choosing top-50 and bottom-50 companies

In [2]:
target_long = pd.DataFrame(['ASML', 'GOOG', 'ACN', 'AAPL', 'SONY',
                            'INTU', 'V', 'MA', 'BX', 'TTE'],
                           columns=['Ticker'])
# TODO
target_short = pd.DataFrame([],
                            columns=['Ticker']
                            )

# Getting 2-month history

In [3]:
def get_history(target_df: pd.DataFrame, long=True) -> pd.DataFrame:
    history = pd.DataFrame(columns=list(target_long['Ticker']))
    for ticker in target_df['Ticker']:
        # TODO think about the appropriate price column to use
        history[ticker] = yf.Ticker(ticker).history(interval='5m')['High' if long else 'Low']
    return history

In [4]:
history_long = get_history(target_long)
history_long.info()
history_long.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1793 entries, 2023-10-09 09:30:00-04:00 to 2023-11-08 15:55:00-05:00
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ASML    1793 non-null   float64
 1   GOOG    1793 non-null   float64
 2   ACN     1793 non-null   float64
 3   AAPL    1793 non-null   float64
 4   SONY    1786 non-null   float64
 5   INTU    1793 non-null   float64
 6   V       1793 non-null   float64
 7   MA      1793 non-null   float64
 8   BX      1793 non-null   float64
 9   TTE     1793 non-null   float64
dtypes: float64(10)
memory usage: 154.1 KB


Unnamed: 0_level_0,ASML,GOOG,ACN,AAPL,SONY,INTU,V,MA,BX,TTE
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-10-09 09:30:00-04:00,589.309998,138.029999,310.450012,176.880005,82.5,528.77002,233.647507,395.929993,103.730003,65.019997
2023-10-09 09:35:00-04:00,584.61499,137.551804,311.23999,176.380005,82.389999,529.064819,233.940002,393.839996,103.858398,65.150002
2023-10-09 09:40:00-04:00,586.650024,137.710007,311.109985,176.839996,82.5,530.330017,233.720001,393.390015,104.099998,65.160004
2023-10-09 09:45:00-04:00,587.619995,138.460007,310.890015,177.089996,82.580002,530.909973,233.847504,393.869995,104.058403,65.089996
2023-10-09 09:50:00-04:00,587.830017,138.689896,310.429993,177.169998,82.584999,531.455017,233.25,392.410004,103.68,64.93


In [5]:
history_short = get_history(target_short)
history_short.info()
history_short.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ASML    0 non-null      object
 1   GOOG    0 non-null      object
 2   ACN     0 non-null      object
 3   AAPL    0 non-null      object
 4   SONY    0 non-null      object
 5   INTU    0 non-null      object
 6   V       0 non-null      object
 7   MA      0 non-null      object
 8   BX      0 non-null      object
 9   TTE     0 non-null      object
dtypes: object(10)
memory usage: 124.0+ bytes


Unnamed: 0,ASML,GOOG,ACN,AAPL,SONY,INTU,V,MA,BX,TTE


# Getting 2-month compound returns

In [6]:
def get_returns(history_df: pd.DataFrame) -> np.ndarray:
    n = len(history_df.columns)
    returns = np.zeros(n, dtype=float)
    for i in range(n):
        # TODO think about the appropriate return calculation
        helper = history_df.iloc[:, i] / history_df.iloc[:, i].shift(1)
        helper = helper.iloc[1:]
        returns[i] = np.prod(helper) - 1
    return returns

In [7]:
returns_long = get_returns(history_long)
print(f'Top companies\' returns: {returns_long}')

Top companies' returns: [ 0.08487896 -0.0337608   0.02448051  0.0345432   0.05841045 -0.01106348
  0.04417978 -0.01394183 -0.04309265 -0.00461389]


In [8]:
returns_short = get_returns(history_short)
print(f'Bottom companies\' returns: {returns_short}')

Bottom companies' returns: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


# Getting weights

In [9]:
def sharpe_long(weights):
    return -np.dot(returns_long, weights) / np.sqrt(weights @ history_long.cov().to_numpy() @ weights)


def sharpe_short(weights):
    return -np.dot(returns_short, weights) / np.sqrt(weights @ history_short.cov().to_numpy() @ weights)

In [10]:
def get_weights(history_df: pd.DataFrame, long=True) -> np.ndarray:
    n = len(history_df.columns)
    weights = np.ones(n)

    bounds = [(0.0, None)] * n if long else [(None, 0.0)] * n
    constraints = ({'type': 'eq', 'fun': lambda weights: weights.sum() - 1.0})
    optimal_weights = minimize(sharpe_long if long else sharpe_short,
                               weights, 
                               method='SLSQP',
                               constraints=constraints,
                               bounds=bounds
                               ).x

    return optimal_weights

In [11]:
weights_long = get_weights(history_long, True)
print(f'Optimal weights for top companies: {weights_long}')
print(f'Optimal Sharpe ratio: {-sharpe_long(weights_long)}')

Optimal weights for top companies: [6.39782185e-10 3.13414921e-11 3.22108130e-10 7.11078752e-09
 9.99999988e-01 1.65696829e-11 2.63636752e-08 5.90441564e-11
 1.49244750e-11 0.00000000e+00]
Optimal Sharpe ratio: 0.028458272296391824


In [None]:
weights_short = get_weights(history_long, False)
print(f'Optimal weights for bottom companies: {weights_short}')
print(f'Optimal Sharpe ratio: {-sharpe_long(weights_short)}')