In [None]:
#!pip install yfinance

# Invest Portfolio

Ivan has a large amout of money, and he starts to think how to invest this money to gain maximum profit.
He chooses 10 candidate companies for that, but since Ivan is new in investing he needs your advice. In this notebook we will help Ivan in his journey.      

In [113]:
# 10 companies to consider, key - ticker name, value -  short description.
tickers = {
     "BA": "The Boeing Company",
     "MCD": "McDonald's Corporation",
     "GOOGL": "Google Company",
     "DIS": "The Walt Disney Company",
     "PFE": "Pfizer Inc.",
     "AMT": "American Tower Corporation",
     "VOD": "Vodafone Group Public Limited Company",
     "BNP.PA": "BNP Paribas Bank",
     "ADS.DE": "Adidas",
     "TM": "Toyota Motor Corporation",
 }


## Task 1 Download and preprocess data

We will use data from 2010 till 2021 for training and data from 2021 till 2024  for testing purpose.



In [114]:
start_train_date = "2010-01-01"
end_train_date = "2021-01-01"
start_test_date = "2021-01-01"
end_test_date = "2024-01-01"

Write a function that takes ticker name, start and end dates and returns pandas series of annual stock prices.



In [155]:
import yfinance as yf
import pandas as pd
import numpy as np


def download_annual_stock_prices(tickers_list, buy_date= "2010-01-01", sell_date="2021-01-01"):
    """Download the annual stock price using yf.download"""
    return yf.download(tickers_list, start=buy_date, end=sell_date, interval="1mo", progress=False)["Open"][::12]

In [156]:
annual_stock_price_df = download_annual_stock_prices(list(tickers.keys()), start_train_date, end_train_date)
annual_stock_price_df["BA"]

Date
2010-01-01     55.720001
2011-01-01     66.150002
2012-01-01     74.699997
2013-01-01     76.550003
2014-01-01    136.009995
2015-01-01    131.070007
2016-01-01    141.380005
2017-01-01    156.300003
2018-01-01    295.750000
2019-01-01    316.190002
2020-01-01    328.549988
Name: BA, dtype: float64


Let's define $\xi_i$ as random variable equals annual return ratio of the $\text{stock}_i$  relative to the buying price.

Convert annual stock price to annual return ratio considering that we've bought stock at moment $t=0$.
i.e. if the annoaul stock prices are $(10, 15, 12, 13)$,
 annual return ratio will be $(0.5, -0.3, 0.1)$

In [157]:
def get_annual_return_ratio_from_initial(df):
    diff_df = df.diff(1).dropna()
    return diff_df.div(df.iloc[0])


test_stock_price_df = pd.DataFrame({
    "ticker_1": [1, 3, 2, 5],
    "ticker_2": [10, 15, 12, 13],
})
test_annual_return_ratio_df = pd.DataFrame({
    "ticker_1": [2.0, -1.0, 3.0],
    "ticker_2": [0.5, -0.3, 0.1],
})
result_np = get_annual_return_ratio_from_initial(test_stock_price_df).values

In [158]:
np.testing.assert_array_equal(result_np, test_annual_return_ratio_df.values)

In [159]:
annual_return_ratio_df = get_annual_return_ratio_from_initial(annual_stock_price_df)

## Task 2 Simple approach

Let's invest all money in most profitable company but for that we need to calculate $E\xi_i$ first.

We can calculate this value from company annual return ratio and from stock price. Let's do both to make sure that we do everything right.

In [160]:
eps = 1e-7

def get_expectation_ratio_from_return_ratio(ticker_return_ratio_df):
    return ticker_return_ratio_df.mean()

def get_expectation_ratio_from_stock_price(ticker_stock_price_df):
    pct_price_change_df = (ticker_stock_price_df.iloc[-1] / ticker_stock_price_df.iloc[0]) - 1
    return pct_price_change_df / (len(ticker_stock_price_df) - 1)

In [161]:
expectation_ratio_1 = get_expectation_ratio_from_return_ratio(annual_return_ratio_df)
expectation_ratio_2 =  get_expectation_ratio_from_stock_price(annual_stock_price_df)
assert sum(abs(expectation_ratio_1 - expectation_ratio_2)) < eps

Looks like we are ready to implement our greedy approach:

$$ \text{company-to-invest} = \max_i E \xi_i $$

Let's implement this approach having tickers and dates of buying and selling:

In [162]:
def get_expectation_ratio_for_greedy_performance(tickers_list, buy_date, sell_date):
    prices_df = download_annual_stock_prices(tickers_list, buy_date, sell_date)
    return get_expectation_ratio_from_stock_price(prices_df)

def find_ticker_for_greedy_selection(tickers, buy_date, sell_date):
    # Write your code here
    pass

assert 0.8 < get_expectation_ratio_for_greedy_performance(["AAPL"], start_train_date, end_train_date) < 0.9


In [163]:
get_expectation_ratio_for_greedy_performance(["AAPL"], start_train_date, end_train_date)

0.8715972202629864

In [None]:
best_greedy_ticker = find_ticker_for_greedy_selection(tickers, start_train_date, end_train_date)
greedy_ticker_train_performace = get_expectation_ratio_for_greedy_performance(
    best_greedy_ticker, start_train_date, end_train_date
)
greedy_ticker_test_performace = get_expectation_ratio_for_greedy_performance(
    best_greedy_ticker, start_test_date, end_test_date
)
print(f"\n ticker to invest in greedy approach: {best_greedy_ticker}")
print(f"Train expectation ratio: {greedy_ticker_train_performace}")
print(f"Test expectation ratio: {greedy_ticker_test_performace}")


Looks like our greedy method is not very helpful.

## Task 3 Variance

If stock has big variance it means that investor cannot be sure about the future. What is the variance in our case?

Your task is to find $D\xi_i$

In [None]:
def get_variance_from_return_ratio(ticker_return_ratio):
    # Write your code here
    pass

def get_ticker_variance(ticker, buy_date, sell_date):
    # Write your code here
    pass

assert 1.9 < get_ticker_variance("AAPL", start_train_date, end_train_date) < 2.0


Then let's reformulate our task, we will select stock to invest by maximizing the following:

$$\text{company-to-invest} = \max_i \left( \frac{E\xi_i} {\sqrt{ D\xi_i}} \right)$$

In [None]:
def find_ticker_for_max_expectation_divided_by_var(tickers, buy_date, sell_date):
    # Write your code here
    pass


In [None]:
best_expectation_var_ticker = find_ticker_for_max_expectation_divided_by_var(
    tickers, start_train_date, end_train_date
)
expectation_var_train_performace = get_expectation_ratio_for_greedy_performance(
    best_expectation_var_ticker, start_train_date, end_train_date
)
greedy_ticker_test_performace = get_expectation_ratio_for_greedy_performance(
    best_expectation_var_ticker, start_test_date, end_test_date
)
print(f"\n ticker to invest in expectation-variance approach: {best_expectation_var_ticker}")
print(f"Train expectation ratio: {expectation_var_train_performace}")
print(f"Test expectation ratio: {greedy_ticker_test_performace}")


Still not good...

## Task 4 Linear combination

In investing, there is a golden rule not to put all your eggs in one basket. So why shouldn't Ivan diversify his investments?

Let's define the weight vector $w = (w_1, \ldots, w_n)$ such that $|w| = \sum_{i} w_i = 1;$ and  $ 0 \le w_i \le 1$ .

Our solution can then be represented as:
$\xi = \sum_i w_i \xi_i$

**Task:** Expand the final formula

$$ \max_w \left (  \frac{E[\xi]} { \sqrt{D[\xi]} } \right)   =
    \max_w \left (  \frac{E \left[ \sum_i w_i \xi_i \right] } { \sqrt{D \left[ \sum_i w_i \xi_i \right] } } \right)  = \ldots
$$  



To find best vector $w$ we have to create a function that returns our metric having vector $w$

In [None]:
# Note: feel free to add additional functions

def get_expectation(tickers, buy_date, sell_date, weights):
    """Calucate expected profit if we invest in each tickers with sertain weight."""
    # Write your code here
    pass

def create_functional(tickers, buy_date, sell_date):
    # Write your code here

    def functional(weights):
        # Write your code here
        pass


    return functional


functional = create_functional(tickers, start_train_date, end_train_date)


equal_weights = [1 / len(ticker) for ticker in tickers]
metric_for_equal_weights = functional(equal_weights)

assert 1.0 < metric_for_equal_weights < 1.5


## Task 5 Optimization

But how to find best weights?

Let's define optimization problem and use _minimize_ function from _sicpy_ package.

**Your task** Find best weights and evaluate train and test results.

In [None]:
from scipy.optimize import minimize

minimization_functional = lambda weights: - functional(weights)

# Write you code here


## Task 6 Bonus: Experiment with initial weights


For running mimimzation algorithm we need to define initial vector $w_0$, but how to do that?

One of the approaches is to try $N$ different starting points and select the best.

**Your task**
1. Implement algorithm that generates $N$ random vectors for weights then tries to minimize functional $N$ times using these vectors as a starting points. And returns the best founded weights.
2. Apply this  algorithm for our task and compare results


In [None]:
from tqdm import tqdm

def generate_random_weights(vector_size):
    # Write your code here
    pass

def random_search_for_initial_weights(functional_to_mimimize, n_tries, vector_size):
    # Write your code here
    pass


optimal_weights, func_value = random_search_for_initial_weights(minimization_functional, 100, vector_size= len(tickers))


print("Optimal weights:", optimal_weights)
print("Maximized F(weights):", func_value)
print("train expectation:", get_expectation(tickers,start_train_date, end_train_date, optimal_weights))
print("test expectation:", get_expectation(tickers, start_test_date, end_test_date, optimal_weights))