# <span style='color:red'>Project 2.  Due October 23</span>

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 90% !important; }</style>"))

  from IPython.core.display import display, HTML


### In this project we develop a first-order algorithm to construct a portfolio using intraday data.

In [2]:
import csv
import sys
import scipy.io
import numpy as np
import math
import matplotlib.pyplot as plt

##### We will have data involving $n$ assets, and use the first $T$ days of the data to compute the portfolio.
##### The computation will produce a weight $x_i$ for each asset $i = 1,...,n$, which could be long or short.
##### We assume that on each day, a position is taken at the open, and closed at noon.  So we define:
$$ p^o_{j,t} = \ \text{price of asset $j$ on day $t$ at the open}$$
$$ p^1_{j,t} = \ \text{price of asset $j$ on day $t$ at noon}$$
$$ r_{j,t} =  \ \frac{p^1_{j,t} - p^o_{j,t}}{p^o_{j,t}} = \ \text{return earned by asset $j$ on day $t$.}$$
$$ \bar r_j = \ \frac{1}{T} \sum_{t = 1}^T r_{j,t} = \ \text{average return earned by asset $j$.}$$

#### The optimization problem to solve depends on two parameters: $\theta \ge 0$ and $\pi > 0 0$.
####
$$ \text{minimize} \ \left(-\sum_{j = 1}^n \bar r_j x_j\right) \ + \ \theta \left( \frac{1}{T} \sum_{t = 1}^{T}\left[\sum_{j = 1}^n (r_{j,t} -  \bar r_j)x_j\right]^\pi\right)^{1/\pi}$$
#### 
#### There are no constraints on the quantities $x_j$.
#### The first sum is minus the average return earned by the portfolio.  In the second sum, the quantity inside the square brackets is the excess return earned by the portfolio on day $t$, magnified by the power $\pi$.  The quantity $\theta$ is a risk aversion parameter.


### <span style='color:red'> Task 1. Develop a first-order method to address this computational problem.</span>
#### 
#### Your method should work with values of $T$ at least $100$. Use the data we provide for AMZN, NFLX, TSLA, i.e., $n = 3$. 
###
#### Make sure your code works with $\pi = 0.5, 2, 4, 6$, and $\theta = 0.1, 10, 1000, 10^5, 10^6$.

#### Data Preprocessing:

In [229]:
import pandas as pd
# construct interval data
from datetime import datetime, time
from datetime import date
import warnings

# Suppress the warning
warnings.filterwarnings("ignore")

df_TSLA = pd.read_csv('TSLA.csv', skiprows = 3)
df_TSLA = df_TSLA.loc[:, ['Dates', 'Close']].dropna()
df_TSLA['Dates'] = pd.to_datetime(df_TSLA['Dates'])
start = datetime.strptime('09:30:00', '%H:%M:%S').time()
noon = datetime.strptime('12:00:00', '%H:%M:%S').time()
df_TSLA['Date']=df_TSLA['Dates'].dt.date
df_TSLA = df_TSLA[(df_TSLA['Dates'].dt.time==noon) | (df_TSLA['Dates'].dt.time==start)]



df_AMZN = pd.read_csv('AMZN.csv', skiprows = 3)
#missing value backfill
df_AMZN['Dates'][0]='1/4/21 9:30'
df_AMZN = df_AMZN.loc[:, ['Dates', 'Close']].dropna()
df_AMZN['Dates'] = pd.to_datetime(df_AMZN['Dates'])
start = datetime.strptime('09:30:00', '%H:%M:%S').time()
noon = datetime.strptime('12:00:00', '%H:%M:%S').time()
noon_correction = datetime.strptime('12:01:00', '%H:%M:%S').time()
df_AMZN['Date']=df_AMZN['Dates'].dt.date
df_AMZN = df_AMZN[(df_AMZN['Dates'].dt.time==noon) | (df_AMZN['Dates'].dt.time==start) | ((df_AMZN['Dates'].dt.time==noon_correction) & (df_AMZN['Date'] == date(2021, 4, 20)) )|((df_AMZN['Dates'].dt.time==noon_correction) & (df_AMZN['Date'] == date(2021, 6, 14)) )]



df_NFLX = pd.read_csv('NFLX.csv', skiprows = 3)
#missing value backfill
df_NFLX['Dates'][0]='2/1/21 9:30'
df_NFLX = df_NFLX.loc[:, ['Dates', 'Close']].dropna()
df_NFLX['Dates'] = pd.to_datetime(df_NFLX['Dates'])
start = datetime.strptime('09:30:00', '%H:%M:%S').time()
noon = datetime.strptime('12:00:00', '%H:%M:%S').time()
df_NFLX['Date']=df_NFLX['Dates'].dt.date
df_NFLX = df_NFLX[(df_NFLX['Dates'].dt.time==noon) | (df_NFLX['Dates'].dt.time==start)]


mask1 = df_TSLA['Date'].isin(df_AMZN['Date'])
mask2 = df_TSLA['Date'].isin(df_NFLX['Date'])
intersection_dates = df_TSLA['Date'][mask1&mask2]

AMZN = np.array(df_AMZN[df_AMZN['Date'].isin(intersection_dates)].groupby('Date')['Close'].diff().dropna())[:100]
NFLX = np.array(df_NFLX[df_NFLX['Date'].isin(intersection_dates)].groupby('Date')['Close'].diff().dropna())[:100]
TSLA = np.array(df_TSLA[df_TSLA['Date'].isin(intersection_dates)].groupby('Date')['Close'].diff().dropna())[:100]


In [235]:
# Define your data, parameters, and objective function here
np.random.seed(42)
# Example usage:
n = 3  # Number of assets
T = 100  # Number of days
x_j = np.random.rand(n) # Initial portfolio weights.
#r_jt = np.random.rand(T, n)  # Returns earned by each asset on each day (randomly generated)
r_jt = np.vstack((AMZN,NFLX,TSLA)).T
bar_r_j = np.mean(r_jt, axis=0)  # Average returns for each asset

#### Helper Functions to Compute Objective Function and Gradient:

In [None]:
# Helper Functions

def compute_objective(x_j, r_jt, bar_r_j, pi, theta):
    return -np.sum(bar_r_j * x_j) + theta * (np.sum((np.dot(r_jt - bar_r_j, x_j) ** pi)) ** (1/pi))

def compute_gradient(x_j, r_jt, bar_r_j, pi, theta):
    n = len(x_j)
    T = len(r_jt)
    
    # Initialize gradient vector
    gradient = np.zeros(n)
    
    gradient_first_term = -bar_r_j
    
    # Compute the excess return
    excess_returns = np.sum((r_jt - bar_r_j) * x_j, axis=1) ** pi

    gradient_second_term = np.mean(excess_returns) ** (1/pi - 1) * np.mean([excess_returns[t] * (r_jt[t] - bar_r_j) for t in range(T)], axis=0)
    
    # Combine both terms to get the gradient
    gradient = gradient_first_term + theta * gradient_second_term
    
    return gradient

#### First Order Method:

In [241]:
import numpy as np
##################################################
pi = 2  # Example value for pi
theta = 0.1  # Example value for theta
##################################################

# Gradient Descent hyperparameters
max_iterations = 10000
tolerance = 1e-6


# Gradient Descent optimization with Backtracking Line Search
learning_rate = 1.0  # Initial learning rate (you can adjust this)
alpha = 0.1  # Backtracking parameter (you can adjust this)
beta = 0.95  # Backtracking parameter (you can adjust this)

for iteration in range(max_iterations):
    if iteration%1000 == 0:
        print('interation:', iteration)
    gradient = compute_gradient(x_j, r_jt, bar_r_j, pi, theta)
    
    # Backtracking Line Search
    t = 1.0
    while compute_objective(x_j - t * gradient, r_jt, bar_r_j, pi, theta) > compute_objective(x_j, r_jt, bar_r_j, pi, theta) - alpha * t * np.linalg.norm(gradient) ** 2:
        t *= beta
    
    # Update portfolio weights
    x_j -= t * gradient

    if iteration%1000 == 0:
        print('gradient norm:', np.linalg.norm(gradient))
        print('objective: ', compute_objective(x_j, r_jt, bar_r_j, pi, theta))
    
    # Check for convergence
    if np.linalg.norm(gradient) < tolerance:
        break

# Final portfolio allocation
final_portfolio_weights = x_j


interation: 0
gradient norm: 28.41095574753845
objective:  24.577138214912456
interation: 1000
gradient norm: 28.410955747538434
objective:  24.577138214912456
interation: 2000
gradient norm: 28.410955747538434
objective:  24.577138214912456
interation: 3000
gradient norm: 28.410955747538434
objective:  24.577138214912456
interation: 4000
gradient norm: 28.410955747538434
objective:  24.577138214912456
interation: 5000
gradient norm: 28.410955747538434
objective:  24.577138214912456


KeyboardInterrupt: 

### <span style='color:red'>Task 2: Benchmark your portfolio on the remaining days</span>
#### On each of the remaining days, we proceed as follows.  Denote by $x^*$ your portfolio. At the market open we invest $10^9 x^*_j$ on each asset $j$, and we close the position (by) noon.  You need to use the asset's price to compute the number of shares that you invest in, whether long or short. So the total you invest equals $$ \sum_{j = 1}^n 10^9 |x^*_j|.$$
#### Report the average return earned by your portfolio.