In [7]:
import investpy
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib as mpl
import scipy.stats as scs
import statsmodels.api as sm
import scipy.optimize as sco
import scipy.interpolate as sci
from pandas_datareader import data as web
import warnings
import time
import seaborn as sn

warnings.filterwarnings('ignore')
# plt.style.use('fivethirtyeight')
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'sans-serif'

In [8]:
file = open("AcoesBrasileiras.txt", "r")
symbols = list(file)
file.close()

In [None]:
initial = time.time()

# Select Dates
from_date = '01/01/2014'
to_date = datetime.today().strftime('%d/%m/%Y') # Today

# Get all stocks in one DF
data = []
new_symbols = [] 

for i in symbols:
    try:
        data.append(investpy.get_stock_historical_data(
                                             stock=i,
                                             country='brazil',
                                             from_date=from_date, 
                                             to_date=to_date, 
                                             interval='Daily'))
        new_symbols.append(i)
        
    except:
        pass

print("It took {:.2f} seconds".format(time.time() - initial))

In [None]:
# Number of stocks = Number of Dataframes !!
dataf = [data[i] for i in range(len(new_symbols))] 

# Concatenate all stocks data
df = pd.concat(objs = dataf,axis = 1)
columns = [['Open_' + str(i) ,'High_' + str(i), 'Low_' + str(i), str(i), 'Volume_' + str(i) ,'Curreny_' + str(i)] for i in symbols]

# Convert columns list into a flat list 
col = [ii for i in columns for ii in i]

print(df.columns)

# # Rename columns in DF
df.columns = col

# # Select Price Column for each stock 
close_price = [i for i in symbols]
df = df[close_price]
df.to_csv('brazilian_stocks.csv') 
# dg = df

df.dtypes

In [None]:
# df = pd.read_csv("brazilian_stocks.csv", parse_dates=True,
#                  verbose=True)
 
df.drop(df.columns[len(df.columns)-1], axis=1, inplace=True)
df.drop(df.columns[len(df.columns)-1], axis=1, inplace=True)


df.head
# for code in symbols:
#       df[code] = pd.to_numeric(df[code], downcast='float')
# df.head()

In [None]:
# Log Return
log_ret = np.log(df/df.shift(1))
log_ret.dropna(inplace=True)

# Simple Return
simple_ret = df.pct_change()
simple_ret.dropna(inplace=True)

In [None]:
# Volatility Function
def realized_volatility(x):
    return np.sqrt(np.sum(x**2))

# Resample Data and Apply Function
freq= 'D'
n_days = 252
r_vol = log_ret.groupby(pd.Grouper(freq=freq)).apply(realized_volatility) # set frequency
r_vol = r_vol*np.sqrt(n_days) # change window


In [None]:
# Statistics
def print_statistics(a1):
  
    sta1 = scs.describe(a1)

    print('%14s %14s' % ('statistic','value')) 
    print(45*"-") 
    print('%14s %14.3f' % ('size',sta1[0])) 
    print('%14s %14.3f' % ('min',sta1[1][0])) 
    print('%14s %14.3f' % ('max',sta1[1][1]))
    print('%14s %14.3f' % ('mean',sta1[2]))  
    print('%14s %14.3f' % ('std',np.sqrt(sta1[3])))
    print('%14s %14.3f' % ('skew',sta1[4])) 
    print('%14s %14.3f' % ('kurtosis',sta1[5])) 

# Normatity Test
def normality_tests(arr):

    print("Skew of data set  %14.3f" % scs.skew(arr))
    print("Skew test p-value %14.3f" % scs.skewtest(arr)[1])
    print("Kurt of data set  %14.3f" % scs.kurtosis(arr))
    print("Kurt test p-value %14.3f" % scs.kurtosistest(arr)[1])
    print("Norm test p-value %14.3f" % scs.normaltest(arr)[1])

# Monte Carlo Simuation

In [None]:
start = time.time()

# Simple Return
returns_annual = simple_ret.mean() * n_days

# covariance
cov_daily = simple_ret.cov()
cov_annual = cov_daily * n_days

# empty lists to store returns, volatility and weights of imiginary portfolios
port_returns = []
port_volatility = []
sharpe_ratio = []
stock_weights = []

# number of combinations for imaginary portfolios
num_stocks = len(symbols)
num_portfolios = 10**6

# set random seed for reproduction
np.random.seed(42)

# populate the empty lists with each portfolios returns,risk and weights
for single_portfolio in range(num_portfolios):

    weights = np.random.random(num_stocks)
    weights /= np.sum(weights)
    returns = np.dot(weights, returns_annual)
    volatility = np.sqrt(np.dot(weights.T, np.dot(cov_annual, weights)))
    sharpe = returns / volatility # riskfree = 0
    sharpe_ratio.append(sharpe) 
    port_returns.append(returns)
    port_volatility.append(volatility)
    stock_weights.append(weights)

# a dictionary for Returns and Risk values of each portfolio
portfolio = {'Returns': port_returns,
             'Volatility': port_volatility,
             'Sharpe Ratio': sharpe_ratio}

# extend original dictionary to accomodate each stock and weight in the portfolio
for counter,symbol in enumerate(symbols):
    portfolio[symbol+' Weight'] = [Weight[counter] for Weight in stock_weights]


print(f"It took {time.time() - start} seconds")
    
portfolio.keys()

In [None]:
# make a nice dataframe of the extended dictionary
dfm = pd.DataFrame(portfolio)

# find min Volatility & max sharpe values in the dataframe (df)
min_volatility = dfm['Volatility'].min()
max_sharpe = dfm['Sharpe Ratio'].max()

# use the min, max values to locate and create the two special portfolios
sharpe_portfolio = dfm.loc[dfm['Sharpe Ratio'] == max_sharpe]
min_variance_port = dfm.loc[dfm['Volatility'] == min_volatility]

# plotting frontier
fig,ax1=plt.subplots()
dfm.plot.scatter('Volatility', 'Returns', c='Sharpe Ratio', 
                cmap='RdYlBu', figsize=(10, 8), grid=True,ax=ax1)
plt.scatter(x=sharpe_portfolio['Volatility'], y=sharpe_portfolio['Returns'], marker=(5,1,0), c='y',s=500)
plt.scatter(x=min_variance_port['Volatility'], y=min_variance_port['Returns'], marker=(5,1,0), c='r', s=500 )
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier',fontsize=20)
plt.show()
plt.savefig("Efficient_Frontier.png")

In [None]:

print('Minimun Variance Portfolio:\n')
min_volatility = dfm['Volatility'].min()
min_variance_port = dfm.loc[dfm['Volatility'] == min_volatility]

# print(type(min_variance_port))
 
# min_variance_port.sort_values(by=['Brand'], inplace=True)

# variance_result.write('Minimun Variance Portfolio:\n')

# min_variance_port1 = min_variance_port1.to_numpy()

print(min_variance_port.T)
# min_variance_port1 = min_variance_port1.sort()
# min_variance_port1.savetxt('minimum_variance_portifolio.txt') 

#
array = min_variance_port.to_numpy()

file = open("minimum_variance_portifolio.txt", "w")
buffer = "Returns: "  +     str(array[0][0])   + "\nVolatility: "  +    str(array[0][1]) + "\nSharpe Ratio: "  +  str(array[0][2]) + "\n"

file.write(buffer)
for i in range(len(symbols) - 1):
    buffer =  str(symbols[i]) + "," + str(array[0][i + 3]) + "\n"
    file.write(buffer)
file.close()     
    
# np.savetxt('minimum_variance_portifolio.txt', min_variance_port1.T)
# for i in min_variance_port.iterrows(): 
#     variance_result.write(str(i) + "\n")


# print('\n\nMaximum sharpe Portfolio:\n')
# print(sharpe_portfolio.T)

# variance_result.write('Maximum sharpe Portfolio:\n')
# for i in range(len(min_variance_port)):
#     variance_result.write(min_variance_port[i].T + "\n")
    

# variance_result.close()

# Optimization Problem



In [None]:
# Functions for Portfolio Calculations
def statistics(weights):
    ''' Sharpe ratio for riskfree = 0
    '''
    weights = np.array(weights)
    p_rets = np.sum(simple_ret.mean() * weights) * n_days
    p_volt = np.sqrt(np.dot(weights.T, np.dot(simple_ret.cov() * n_days, weights)))
    return np.array([p_rets, p_volt, p_rets / p_volt])

In [None]:
# Functions to Minimize
def min_func_sharpe(weights):
    return -statistics(weights)[2]

def min_func_variance(weights):
    return statistics(weights)[1]

In [None]:
# Constraints
cons = ({'type': 'eq', 'fun': lambda x:  np.sum(x) - 1}) # No Short positions

# Boundaries
bnds = tuple((0, 1) for x in range(num_stocks))

# Equal Weights
equal_weights = num_stocks*[1./num_stocks]

In [None]:
# start = time.time()

# # Maximization of Sharpe Ratio 
# opts = sco.minimize(min_func_sharpe, equal_weights, method='SLSQP',
#                        bounds=bnds, constraints=cons)

# print("It took {:.2f} seconds".format(time.time() - start))
# print(opts)

In [None]:
# # # Optimal weights that maximizes the sharpe ratio
# pd.DataFrame([round(x,4) for x in opts['x']],index=symbols).T

In [None]:
# # Optimal Portfolio Metrics [Annual Return, Annual Volatility, Sharpe Ratio]
# statistics(opts['x']).round(5)

In [None]:
# # # Minimun Variance Portfolio
# start = time.time()

# optv = sco.minimize(min_func_variance, equal_weights, method='SLSQP',
#                        bounds=bnds, constraints=cons)

# print("It took {:.2f} seconds".format(time.time() - start))
# optv

In [None]:
# # Optimal Weights in Minumum Variance Portfolio
# pd.DataFrame([round(x,2) for x in optv['x']],index=symbols).T

In [None]:
# # Minimum Variance Porfolio Metrics [Annual Return, Annual Volatility, Sharpe Ratio]
# statistics(optv['x']).round(5)

In [None]:
# # Eficient Frontier Calculations

# cons = ({'type': 'eq', 'fun': lambda x:  statistics(x)[0] - i},
#         {'type': 'eq', 'fun': lambda x:  np.sum(x) - 1})

# bnds = tuple((0, 1) for x in weights)

# target_ret = np.linspace(0.02, 0.142, 50)
# target_vol = []

# for i in target_ret:
#     res = sco.minimize(min_func_variance, equal_weights, method='SLSQP',
#                        bounds=bnds, constraints=cons)
#     target_vol.append(res['fun'])

# target_vol = np.array(target_vol)

In [None]:
# # Plotting frontier
# fig,ax1=plt.subplots()
# # Random Portfolio Composition
# dfm.plot.scatter('Volatility', 'Returns', c='Sharpe Ratio', 
#                 cmap='RdYlBu', figsize=(10, 8), grid=True,ax=ax1)
# # Eficient Frontier
# plt.plot(target_vol, target_ret, c='black', lw=2, linestyle='--')
# # portfolio with highest Sharpe ratio
# # plt.plot(statistics(opts['x'])[1], statistics(opts['x'])[0], 'r*', markersize=28, c='y')    
# # minimum variance portfolio
# plt.plot(statistics(optv['x'])[1], statistics(optv['x'])[0], 'y*', markersize=28, c='r')

# # Plot Stocks
# MARKS = ['o', 'X', 'd', '*','x']

# for i in range(num_stocks):
#     plt.scatter(x=np.sqrt(cov_annual.iloc[i, i]),y=returns_annual[i],marker=MARKS[i],
#              s=150, color='black', label=symbols[i])
# plt.legend(loc = 'upper left')

# plt.xlabel('Volatility (Std. Deviation)')
# plt.ylabel('Expected Returns')
# plt.title('Efficient Frontier',fontsize=20)
# plt.show()

In [None]:
# Pyportfolio 


In [None]:
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

# Portfolio Optimization

mu = expected_returns.mean_historical_return(df)
s = risk_models.sample_cov(df)

# Optimize for max Sharpe Ratio

ef = EfficientFrontier(mu,s)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights,'\n')

# Riskfree = 0
ef.portfolio_performance(verbose=True, risk_free_rate=0)

In [None]:
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

total_position = 15000

# Take the last price of stocks
latest_prices = get_latest_prices(df)
weights = cleaned_weights
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value = total_position)

allocation, leftover = da.lp_portfolio()
print("Discrete Allocation:", allocation)
print('Funds Remaining: ${:.2f}'.format(leftover))

In [None]:
print("It took {:.2f} seconds" .format(time.time() - initial))