In [None]:
#!pip install yfinance

# Invest Portfolio

Ivan has a large amout of money, and he starts to think how to invest this money to gain maximum profit.
He chooses 10 candidate companies for that, but since Ivan is new in investing he needs your advice. In this notebook we will help Ivan in his journey.      

In [1]:
# 10 companies to consider, key - ticker name, value -  short description.
tickers = {
     "BA": "The Boeing Company",
     "MCD": "McDonald's Corporation",
     "GOOGL": "Google Company",
     "DIS": "The Walt Disney Company",
     "PFE": "Pfizer Inc.",
     "AMT": "American Tower Corporation",
     "VOD": "Vodafone Group Public Limited Company",
     "BNP.PA": "BNP Paribas Bank",
     "ADS.DE": "Adidas",
     "TM": "Toyota Motor Corporation",
 }


## Task 1 Download and preprocess data

We will use data from 2010 till 2021 for training and data from 2021 till 2024  for testing purpose.



In [2]:
start_train_date = "2010-01-01"
end_train_date = "2021-01-01"
start_test_date = "2021-01-01"
end_test_date = "2024-01-01"

Write a function that takes ticker name, start and end dates and returns pandas series of annual stock prices.



In [32]:
import yfinance as yf
import pandas as pd
import numpy as np


def download_annual_stock_price(ticker, buy_date= "2010-01-01", sell_date="2021-01-01"):
    """Download the annual stock price using yf.download"""
    data = yf.download(ticker, start=buy_date, end=sell_date, interval="3mo", progress=False)[::4]
    return data["Open"].values

annual_stock_price = {
    ticker: download_annual_stock_price(ticker, start_train_date, end_train_date) 
    for ticker in tickers
}
annual_stock_price["BA"]

array([ 55.72000122,  66.15000153,  74.69999695,  76.55000305,
       136.00999451, 131.07000732, 141.38000488, 156.30000305,
       295.75      , 316.19000244, 328.54998779])


Let's define $\xi_i$ as random variable equals annual return ratio of the $\text{stock}_i$  relative to the buying price.

Convert annual stock price to annual return ratio considering that we've bought stock at moment $t=0$.
i.e. if the annoaul stock prices are $(10, 15, 12, 13)$,
 annual return ratio will be $(0.5, -0.3, 0.1)$

In [47]:
def get_annual_return_ratio_from_initial(annual_stock_prices):
    annual_ratio = (annual_stock_prices[1:] - annual_stock_prices[:-1]) / annual_stock_prices[0]
    return annual_ratio

test_annual_return_ratio = get_annual_return_ratio_from_initial(np.array([1,3,2]))
assert np.all(test_annual_return_ratio == np.array([2.0, -1.0]))

test_annual_return_ratio = get_annual_return_ratio_from_initial(np.array([10, 15, 12, 13]))
assert np.all(test_annual_return_ratio == np.array([0.5, -0.3, 0.1]))

annual_return_ratio = {
    ticker: get_annual_return_ratio_from_initial(values)
    for ticker, values in annual_stock_price.items()
}

## Task 2 Simple approach

Let's invest all money in most profitable company but for that we need to calculate $E\xi_i$ first.

We can calculate this value from company annual return ratio and from stock price. Let's do both to make sure that we do everything right.

In [79]:
eps = 1e-7

def get_expectation_ratio_from_return_ratio(ticker_return_ratio):
    return np.mean(ticker_return_ratio)

def get_expectation_ratio_from_stock_price(ticker_stock_price):
    pct_price_change = (ticker_stock_price[-1] / ticker_stock_price[0]) - 1
    return pct_price_change / (len(ticker_stock_price) - 1) 


for ticker in tickers:
    expectation_ratio_1 = get_expectation_ratio_from_return_ratio(annual_return_ratio[ticker])
    expectation_ratio_2 =  get_expectation_ratio_from_stock_price(annual_stock_price[ticker])
    assert abs(expectation_ratio_1 - expectation_ratio_2) < eps


Looks like we are ready to implement our greedy approach:

$$ \text{company-to-invest} = \max_i E \xi_i $$

Let's implement this approach having tickers and dates of buying and selling:

In [80]:
def get_expectation_ratio_for_greedy_performance(ticker, buy_date, sell_date):
    ticker_prices = download_annual_stock_price(ticker, buy_date, sell_date)
    return get_expectation_ratio_from_stock_price(ticker_prices)

def find_ticker_for_greedy_selection(tickers, buy_date, sell_date):
    max_expectation_ration = -float("INF")
    company_to_invest = ""
    for ticker in tickers:
        expectation_ratio = get_expectation_ratio_for_greedy_performance(ticker, buy_date, sell_date)
        if expectation_ratio > max_expectation_ration:
            max_expectation_ration = expectation_ratio
            company_to_invest = ticker
    return company_to_invest

assert 0.8 < get_expectation_ratio_for_greedy_performance("AAPL", start_train_date, end_train_date) < 0.9

In [81]:
best_greedy_ticker = find_ticker_for_greedy_selection(tickers, start_train_date, end_train_date)
greedy_ticker_train_performance = get_expectation_ratio_for_greedy_performance(
    best_greedy_ticker, start_train_date, end_train_date
)
greedy_ticker_test_performance = get_expectation_ratio_for_greedy_performance(
    best_greedy_ticker, start_test_date, end_test_date
)
print(f"\n ticker to invest in greedy approach: {best_greedy_ticker}")
print(f"Train expectation ratio: {greedy_ticker_train_performance}")
print(f"Test expectation ratio: {greedy_ticker_test_performance}")



 ticker to invest in greedy approach: ADS.DE
Train expectation ratio: 0.5613642301863977
Test expectation ratio: -0.11804856457595814


Looks like our greedy method is not very helpful.

## Task 3 Variance

If stock has big variance it means that investor cannot be sure about the future. What is the variance in our case?

Your task is to find $D\xi_i$

In [82]:
def get_variance_from_return_ratio(ticker_return_ratio):
    return np.var(ticker_return_ratio)

def get_ticker_variance(ticker, buy_date, sell_date):
    ticker_prices = download_annual_stock_price(ticker, buy_date, sell_date)
    return_ratio = get_annual_return_ratio_from_initial(ticker_prices)
    return get_variance_from_return_ratio(return_ratio)

assert 1.9 < get_ticker_variance("AAPL", start_train_date, end_train_date) < 2.0

Then let's reformulate our task, we will select stock to invest by maximizing the following:

$$\text{company-to-invest} = \max_i \left( \frac{E\xi_i} {\sqrt{ D\xi_i}} \right)$$

In [77]:
def find_ticker_for_max_expectation_divided_by_var(tickers, buy_date, sell_date):
    max_expect_div_by_var = -float("INF")
    company_to_invest = ""
    for ticker in tickers:
        expectation_ratio = get_expectation_ratio_for_greedy_performance(ticker, buy_date, sell_date)
        var = get_ticker_variance(ticker, buy_date, sell_date)
        expect_div_by_var = expectation_ratio / np.sqrt(var)
        if expect_div_by_var > max_expect_div_by_var:
            max_expect_div_by_var = expect_div_by_var
            company_to_invest = ticker
    return company_to_invest

In [83]:
best_expectation_var_ticker = find_ticker_for_max_expectation_divided_by_var(
    tickers, start_train_date, end_train_date
)
expectation_var_train_performance = get_expectation_ratio_for_greedy_performance(
    best_expectation_var_ticker, start_train_date, end_train_date
)
greedy_ticker_test_performance = get_expectation_ratio_for_greedy_performance(
    best_expectation_var_ticker, start_test_date, end_test_date
)
print(f"\n ticker to invest in expectation-variance approach: {best_expectation_var_ticker}")
print(f"Train expectation ratio: {expectation_var_train_performance}")
print(f"Test expectation ratio: {greedy_ticker_test_performance}")



 ticker to invest in expectation-variance approach: DIS
Train expectation ratio: 0.347046133188101
Test expectation ratio: -0.25589815088687423


Still not good...

## Task 4 Linear combination

In investing, there is a golden rule not to put all your eggs in one basket. So why shouldn't Ivan diversify his investments?

Let's define the weight vector $w = (w_1, \ldots, w_n)$ such that $|w| = \sum_{i} w_i = 1;$ and  $ 0 \le w_i \le 1$ .

Our solution can then be represented as:
$\xi = \sum_i w_i \xi_i$

**Task:** Expand the final formula

$$ \max_w \left (  \frac{E[\xi]} { \sqrt{D[\xi]} } \right)   =
    \max_w \left (  \frac{E \left[ \sum_i w_i \xi_i \right] } { \sqrt{D \left[ \sum_i w_i \xi_i \right] } } \right)  = \ldots
$$  



To find best vector $w$ we have to create a function that returns our metric having vector $w$

In [None]:
# Note: feel free to add additional functions

def get_expectation(tickers, buy_date, sell_date, weights):
    """Calculate expected profit if we invest in each tickers with certain weight."""
    # Write your code here
    pass

def create_functional(tickers, buy_date, sell_date):
    # Write your code here

    def functional(weights):
        # Write your code here
        pass


    return functional


functional = create_functional(tickers, start_train_date, end_train_date)


equal_weights = [1 / len(ticker) for ticker in tickers]
metric_for_equal_weights = functional(equal_weights)

assert 1.0 < metric_for_equal_weights < 1.5


## Task 5 Optimization

But how to find best weights?

Let's define optimization problem and use _minimize_ function from _sicpy_ package.

**Your task** Find best weights and evaluate train and test results.

In [None]:
from scipy.optimize import minimize

minimization_functional = lambda weights: - functional(weights)

# Write you code here


## Task 6 Bonus: Experiment with initial weights


For running mimimzation algorithm we need to define initial vector $w_0$, but how to do that?

One of the approaches is to try $N$ different starting points and select the best.

**Your task**
1. Implement algorithm that generates $N$ random vectors for weights then tries to minimize functional $N$ times using these vectors as a starting points. And returns the best founded weights.
2. Apply this  algorithm for our task and compare results


In [None]:
from tqdm import tqdm

def generate_random_weights(vector_size):
    # Write your code here
    pass

def random_search_for_initial_weights(functional_to_minimize, n_tries, vector_size):
    # Write your code here
    pass


optimal_weights, func_value = random_search_for_initial_weights(minimization_functional, 100, vector_size= len(tickers))


print("Optimal weights:", optimal_weights)
print("Maximized F(weights):", func_value)
print("train expectation:", get_expectation(tickers,start_train_date, end_train_date, optimal_weights))
print("test expectation:", get_expectation(tickers, start_test_date, end_test_date, optimal_weights))