In [1]:
import pandas as pd
import numpy as np
from symbol import *
from datetime import datetime
import os
import matplotlib.pyplot as plt
from bqplot import pyplot as bplt
import seaborn as sns
from dateutil.relativedelta import relativedelta

sns.set(context='notebook', style="dark", palette="muted", color_codes=True)

%matplotlib inline

In [2]:
def num_missing(x, start='beginning'):
    if start == 'valid_index':
        x = x[x.first_valid_index():]
    return sum(x.isnull())
def impute_returns(x):
    x[abs(x) > 25] = np.nan
    x = x.interpolate(method='time',limit=1)
    x = x.round(4)
    return x
def plot_close(hist_data, symbol_list):
    grouped_data = hist_data.groupby('symbol')
    try:
        bplt.close(1)
    except:
        pass
    bplt.figure(1, title='Line Chart')
    bplt.clear()
    for symbol in symbol_list:
        adjclose = grouped_data.get_group(symbol).set_index('date').adj_close
        bplt.plot(x=adjclose.index, y=adjclose, marker_str=symbol)
        bplt.legend()
    
    bplt.show()

def plot_returns(returns_data, symbol_list):
    try:
        bplt.close(1)
    except:
        pass
    bplt.figure(1, title='Line Chart')
    bplt.clear()
    for symbol in symbol_list:
        bplt.plot(x=returns_data.index, y=returns_data[[symbol]])
        bplt.legend()
    
    bplt.show()
def get_hist_data(hist_data, symbol):
    grouped_data = hist_data.groupby('symbol')
    data = grouped_data.get_group(symbol)
    data = data.set_index('date')
    data = data.drop(['symbol'], axis=1)
    return data

In [3]:
s= Symbol()
i = Index()

In [4]:
ret = s.get_symbol_returns(start='2016',null_count=5, volume=1000)
ret = ret.interpolate(method='time', limit=5, limit_direction='backward')
iret = i.get_index_returns('nifty_50', start='2016')
iret = iret.interpolate(method='time', limit=5, limit_direction='backward')
infy_ret = ret.infy
iret = iret.nifty_50
# column_list = s.returns.columns[s.returns['2000':].interpolate(method='time', limit=1).apply(num_missing) == 0]
len(ret), len(iret)

(255, 255)

In [5]:

def cal_beta(returns, bench):
    if isinstance(returns, pd.Series):
        returns = pd.Dataframe(returns)
    if isinstance(bench, pd.DataFrame):
        bench = bench.ix[:, 0]
    returns = returns.join(bench)
    cov = returns.cov().ix[-1, :]
    var = bench.var()
    return cov/var
cal_beta(ret, iret).head()

def var(returns, alpha):
    # This method calculates the historical simulation var of the returns
    sorted_returns = np.sort(returns)
    # Calculate the index associated with alpha
    index = int(alpha * len(sorted_returns))
    # VaR should be positive
    return abs(sorted_returns[index])
var(infy_ret, 0.05)

def cvar(returns, alpha):
    # This method calculates the condition VaR of the returns
    sorted_returns = np.sort(returns)
    # Calculate the index associated with alpha
    index = int(alpha * len(sorted_returns))
    # Calculate the total VaR beyond alpha
    sum_var = sorted_returns[0]
    for i in range(1, index):
        sum_var += sorted_returns[i]
    # Return the average VaR
    # CVaR should be positive
    return abs(sum_var / index)
cvar(infy_ret, 0.05)

0.034413887525502874

In [16]:
def beta(returns, market):
    # Create a matrix of [returns, market]
    cova = (pd.DataFrame(returns.rename('symbol')).join(market.rename('bench')).cov())
    # Return the covariance of m divided by the standard deviation of the market returns
    return cova.ix[0, 1] / market.var()
print('Beta', beta(infy_ret, iret))

def lower_partial_moment(returns, threshold=0, order=1):
    # Calculate the difference between the threshold and the returns
    diff = threshold - returns
    # Set the minimum of each to 0
    diff = diff.clip(lower=0)

    # Return the mean of the different to the power of order
    return diff.pow(order).mean()
print(lower_partial_moment(infy_ret, 0, 1))

def higher_partial_moments(returns, threshold=0, order=1):
    # Calculate the difference between the threshold and the returns
    diff = returns - threshold
    # Set the minimum of each to 0
    diff = diff.clip(lower=0)

    # Return the mean of the different to the power of order
    return diff.pow(order).mean()
print(hpm(infy_ret, 0, 1))

def prices(returns, base=None):
    if base is None:
        base=1000
    prices = pd.Series(index=returns.index.copy())
    prices = base * np.exp((returns.cumsum()))
    return prices
print(prices(infy_ret).head())




Beta 0.753024298618
0.005524335865392561
0.005008278086339003
date
2016-01-01    999.864303
2016-01-04    976.026778
2016-01-05    971.639226
2016-01-06    967.387371
2016-01-07    950.606115
Name: infy, dtype: float64


In [7]:
def draw_down(returns, tau=5):
    # Returns the draw-down given time period tau
    values = prices(returns)
    pos = len(values) - 1
    pre = pos - tau
    drawdown = float('+inf')
    # Find the maximum drawdown given tau
    while pre >= 0:
        dd_i = (values[pos] / values[pre]) - 1
        if dd_i < drawdown:
            drawdown = dd_i
        pos, pre = pos - 1, pre - 1
    # Drawdown should be positive
    return abs(drawdown)
print('dd', dd(infy_ret, 5))

def max_dd(returns):
    # Returns the maximum draw-down for any tau in (0, T) where T is the length of the return series
    max_drawdown = float('-inf')
    for i in range(0, len(returns)):
        drawdown_i = dd(returns, i)
        if drawdown_i > max_drawdown:
            max_drawdown = drawdown_i
    # Max draw-down should be positive
    return abs(max_drawdown)
print('max_dd', max_dd(infy_ret))

def average_dd(returns, periods):
    # Returns the average maximum drawdown over n periods
    drawdowns = []
    for i in range(0, len(returns)):
        drawdown_i = dd(returns, i)
        drawdowns.append(drawdown_i)
    drawdowns = sorted(drawdowns)
    total_dd = abs(drawdowns[0])
    for i in range(1, periods):
        total_dd += abs(drawdowns[i])
    return total_dd / periods
print('average_dd', average_dd(infy_ret, periods=2))


def average_dd_squared(returns, periods):
    # Returns the average maximum drawdown squared over n periods
    drawdowns = []
    for i in range(0, len(returns)):
        drawdown_i = np.power(dd(returns, i), 2.0)
        drawdowns.append(drawdown_i)
    drawdowns = sorted(drawdowns)
    total_dd = abs(drawdowns[0])
    for i in range(1, periods):
        total_dd += abs(drawdowns[i])
    return total_dd / periods
print('average_dd_squared', average_dd_squared(infy_ret, periods=2))

dd 0.0927291003518
max_dd 0.281240138845
average_dd 0.0440190986765
average_dd_squared 0.00387536209658


In [8]:
def treynor_ratio(returns, market, rf):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return np.sqrt(len(ex_returns)) * ex_returns.mean() / beta(returns, market)
print('treynor_ratio', treynor_ratio(infy_ret, iret, 0.075))
 
def sharpe_ratio(returns, rf):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return np.sqrt(len(ex_returns)) * ex_returns.mean() / ex_returns.std()
print('Sharpe ratio', sharpe_ratio(infy_ret, 0.075))

def information_ratio(returns, benchmark):
    diff = returns - benchmark
    return diff.mean() / diff.std()
print('information_ratio', information_ratio(infy_ret, iret))

def modigliani_ratio(returns, benchmark, rf):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    np_rf = np.empty(len(returns))
    np_rf.fill(rf)
    rdiff = returns - np_rf
    bdiff = benchmark - np_rf
    return np.sqrt(len(ex_returns)) * ex_returns.mean() * ((rdiff).std() / (bdiff).std()) + np.exp(rf)**len(returns)
print('modigliani_ratio', modigliani_ratio(infy_ret, iret, 0.075))


treynor_ratio -0.0169578556839
Sharpe ratio -0.862650881805
information_ratio -0.054957408090781736
modigliani_ratio 1.05500368186


In [9]:
def omega_ratio(returns, rf, target=0):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / lpm(returns, target, 1)
print('omega_ratio', omega_ratio(infy_ret, 0.075))
    
def sortino_ratio(returns, rf, target=0):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / np.sqrt(lpm(returns, target, 2))
print('sortino_ratio', sortino_ratio(infy_ret, 0.075))

def kappa_three_ratio(returns, rf, target=0):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / np.power(lpm(returns, target, 3), float(1/3))
print('kappa_three_ratio', kappa_three_ratio(infy_ret, 0.075))

def gain_loss_ratio(returns, target=0):
    return hpm(returns, target, 1) / lpm(returns, target, 1)
print('gain_loss_ratio', gain_loss_ratio(infy_ret))

def upside_potential_ratio(returns, target=0):
    return hpm(returns, target, 1) / np.sqrt(lpm(returns, target, 2))
print('upside_potential_ratio', upside_potential_ratio(infy_ret))

omega_ratio -0.14475372897406164
sortino_ratio -0.0736708578639
kappa_three_ratio -0.0468879542863
gain_loss_ratio 0.9065846480684813
upside_potential_ratio 0.461396533428


In [10]:
def calmar_ratio(returns, rf):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / max_dd(returns)
print('calmar_ratio', calmar_ratio(infy_ret, 0.075))

def sterling_ratio(returns, rf, periods):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / average_dd(returns, periods)
print('sterling_ration', sterling_ration(infy_ret, 0.075, 2))
 
def burke_ratio(returns, rf, periods):
    rf = np.log(1+rf)/len(returns)
    ex_returns = returns - rf
    return ex_returns.mean() / np.sqrt(average_dd_squared(returns, periods))
print('burke_ratio', burke_ratio(infy_ret, 0.075, 2))

calmar_ratio -0.00284336446392
sterling_ration -0.0181663923311
burke_ratio -0.012845579207


In [12]:
def set_risk_free_rate(returns, risk_free_rate=RISK_FREE_RATE):
    index = returns.index
    if isinstance(index, pd.DatetimeIndex):
        pass
    else:
        raise ValueError('Invalid index of returns')
    if index.inferred_freq is None:
        if (returns.index[1] - returns.index[0]).days < 10:
            n = returns.resample('A').count().max(axis=1).max()
            risk_free_rate = np.log(1 + risk_free_rate) / n
        else:
            pass
    elif str(index.inferred_freq)[0] == 'W':
        risk_free_rate = np.log(1 + risk_free_rate) / 54
    elif str(index.inferred_freq)[0] == 'M':
        risk_free_rate = np.log(1 + risk_free_rate) / 12
    elif str(index.inferred_freq)[0] == 'Q':
        risk_free_rate = np.log(1 + risk_free_rate) / 4
    return risk_free_rate

def calculate_apm(returns, benchmark_returns,
                   risk_free_rate=0.075):
    capm_schema = BETA_SCHEMA.copy()
    capm_schema.remove('interval')
    if isinstance(benchmark_returns, pd.Series):
        benchmark = benchmark_returns.name
    elif isinstance(benchmark_returns, pd.DataFrame):
        capm_variables = pd.DataFrame(columns=capm_schema)
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            for benchmark in benchmark_returns.columns:
                p = executor.submit(
                    calculate_apm, returns=returns,
                    benchmark_returns=benchmark_returns[benchmark],
                    risk_free_rate=risk_free_rate
                )
                capm = p.result()
                capm_variables = capm_variables.append(capm)
            capm_variables = capm_variables.reset_index(drop=True)
            capm_variables = capm_variables.sort_values(['symbol'])
            return capm_variables
    else:
        print(returns)
        raise ValueError(
            'Index returns must be a series or dataframe only'
        )
    if isinstance(returns, pd.Series):
        symbol = returns.name
        returns = pd.DataFrame(returns)
        returns = returns.join(benchmark_returns)
        returns = returns.dropna(subset=[benchmark])
        risk_free_rate = set_risk_free_rate(returns, risk_free_rate)
        returns = returns - risk_free_rate
        slope, intercept, r_value, p_value, std_err = stats.linregress(y=returns[symbol], x=returns[benchmark])
        std_dev = returns[symbol].std()
        capm_variables = pd.DataFrame(
            [[symbol, benchmark, intercept, slope, std_dev, r_value**2, p_value, std_err]],
            columns=capm_schema
        )
        return capm_variables
    elif isinstance(returns, pd.DataFrame):
        capm_variables = pd.DataFrame(
            columns=capm_schema
        )
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            for symbol in returns.columns:
                p = executor.submit(
                    calculate_apm, returns=returns[symbol],
                    benchmark_returns=benchmark_returns, risk_free_rate=risk_free_rate
                )
                capm = p.result()
                capm_variables = capm_variables.append(capm)
            capm_variables = capm_variables.sort_values(['symbol'])
            capm_variables = capm_variables.reset_index(drop=True)
            return capm_variables
    else:
        print(returns)
        raise ValueError(
            'Index returns must be a series or dataframe only'
        )
def calculate_capm_interval(returns, benchmark_returns,
                            risk_free_rate=0.075,frequency=None):
    if frequency is not None:
        if isinstance(returns, pd.Series):
            returns = pd.DataFrame(returns)
        if frequency == 'A':
            years = returns.index.year
            interval_beta = returns.groupby(years).apply(
                calculate_apm, benchmark_returns=benchmark_returns, risk_free_rate=risk_free_rate
            )
            interval_beta.index = interval_beta.index.droplevel(1)
            interval_beta.index.rename('year', inplace=True)
            interval_beta = interval_beta.reset_index()
        elif frequency == 'M':
            years = returns.index.year
            months = returns.index.month
            interval_beta = returns.groupby([years, months]).apply(
                calculate_apm, benchmark_returns=benchmark_returns, risk_free_rate=risk_free_rate
            )
            interval_beta.index = interval_beta.index.droplevel(2)
            interval_beta.index.rename(['year', 'month'], inplace=True)
            interval_beta = interval_beta.reset_index()
            interval_beta = interval_beta.sort_values(['symbol', 'year', 'month'])
            interval_beta['day'] = 1
            interval_beta.index = pd.to_datetime(interval_beta[['year', 'month', 'day']])
            interval_beta = interval_beta.drop(['year', 'month', 'day'], axis=1)
    return interval_beta
infy_ret = ret.infy
calculate_capm_interval(ret, iret, frequency='M')

Unnamed: 0,symbol,benchmark,alpha,beta,std_dev,r_square,p_value,std_error
2016-01-01,20microns,nifty_50,-0.010382,1.274596,0.039955,0.136554,0.108814,0.755444
2016-02-01,20microns,nifty_50,0.014011,1.157103,0.026186,0.349515,0.004766,0.362144
2016-03-01,20microns,nifty_50,-0.000945,0.744497,0.018439,0.193125,0.052533,0.358683
2016-04-01,20microns,nifty_50,0.003466,0.274577,0.037991,0.005186,0.776439,0.950734
2016-05-01,20microns,nifty_50,-0.097576,-0.243293,0.021663,0.010370,0.652038,0.531452
2016-06-01,20microns,nifty_50,0.007833,1.090774,0.015702,0.297736,0.008621,0.374588
2016-07-01,20microns,nifty_50,-0.003338,0.619472,0.014554,0.062247,0.288774,0.566723
2016-08-01,20microns,nifty_50,0.009305,0.212964,0.043632,0.001108,0.883113,1.430122
2016-09-01,20microns,nifty_50,0.003423,2.030124,0.030240,0.308209,0.011055,0.716888
2016-10-01,20microns,nifty_50,0.003967,0.546902,0.021745,0.035540,0.439565,0.690982


In [14]:
n=10
n = int(np.maximum(n, 252))
n

252

In [None]:
def set_risk_free_rate(returns, risk_free_rate=RISK_FREE_RATE):
    index = returns.index
    if isinstance(index, pd.DatetimeIndex):
        pass
    else:
        raise ValueError('Invalid index of returns')
    if index.inferred_freq is None:
        if (returns.index[1] - returns.index[0]).days < 10:
            n = returns.resample('A').count().max(axis=1).max()
            risk_free_rate = np.log(1 + risk_free_rate) / n
        else:
            pass
    elif str(index.inferred_freq)[0] == 'W':
        risk_free_rate = np.log(1 + risk_free_rate) / 54
    elif str(index.inferred_freq)[0] == 'M':
        risk_free_rate = np.log(1 + risk_free_rate) / 12
    elif str(index.inferred_freq)[0] == 'Q':
        risk_free_rate = np.log(1 + risk_free_rate) / 4
    return risk_free_rate

def calculate_apm(returns, benchmark_returns,
                   risk_free_rate=0.075, frequency=None):

   
    if isinstance(benchmark_returns, pd.Series):
        benchmark_returns = pd.DataFrame(benchmark_returns)
    if frequency is not None:
        if isinstance(returns, pd.Series):
            returns = pd.DataFrame(returns)
        if frequency == 'A':

            interval = returns.index.year
        annual_beta = returns.groupby(interval).apply(
            calculate_apm, benchmark_returns=benchmark_returns, risk_free_rate=risk_free_rate
        )
        annual_beta.index = annual_beta.index.droplevel(1)
        annual_beta.index.rename('year', inplace=True)
        annual_beta = annual_beta.reset_index()
        print(annual_beta)
            
    capm_schema = BETA_SCHEMA.copy()
    capm_schema.remove('interval')
    if isinstance(benchmark_returns, pd.DataFrame):
        capm_variables = pd.DataFrame(columns=capm_schema)
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            for benchmark in benchmark_returns.columns:
                p = executor.submit(
                    calculate_apm, returns=returns,
                    benchmark_returns=benchmark_returns[benchmark],
                    risk_free_rate=risk_free_rate
                )
                capm = p.result()
                capm_variables = capm_variables.append(capm)
            capm_variables = capm_variables.reset_index(drop=True)
            capm_variables = capm_variables.sort_values(['symbol'])
            return capm_variables
    else:
        raise ValueError(
            'Index returns must be a series or dataframe only'
        )
    if isinstance(returns, pd.Series):
        symbol = returns.name
        returns = pd.DataFrame(returns)
        returns = returns.join(benchmark_returns)
        returns = returns.dropna(subset=[benchmark])
        risk_free_rate = set_risk_free_rate(returns, risk_free_rate)
        returns = returns - risk_free_rate
        slope, intercept, r_value, p_value, std_err = stats.linregress(y=returns[symbol], x=returns[benchmark])
        std_dev = returns[symbol].std()
        capm_variables = pd.DataFrame(
            [[symbol, benchmark, intercept, slope, std_dev, r_value**2, p_value, std_err]],
            columns=capm_schema
        )
        return capm_variables
    elif isinstance(returns, pd.DataFrame):
        capm_variables = pd.DataFrame(
            columns=capm_schema
        )
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            for symbol in returns.columns:
                p = executor.submit(
                    calculate_apm, returns=returns[symbol],
                    benchmark_returns=benchmark_returns, risk_free_rate=risk_free_rate
                )
                capm = p.result()
                capm_variables = capm_variables.append(capm)
            capm_variables = capm_variables.sort_values(['symbol'])
            capm_variables = capm_variables.reset_index(drop=True)
            return capm_variables
    else:
        print(returns)
        raise ValueError(
            'Index returns must be a series or dataframe only'
        )
infy_ret = ret.infy
calculate_apm(ret.ix[:, ['infy','wipro']], iret, frequency='A')

In [None]:
len(s.get_symbol_returns().resample('A'))
s.get_symbol_returns().resample('A').count().max(axis=1).max()

In [None]:
iport = s.returns[['INFY', 'BPCL']]['2000':].fillna(0)/100
start_value = 10000
weights = np.ones(len(iport.columns))/len(iport.columns)
myport = pd.DataFrame(0, index=iport.index.copy(), columns = iport.columns.copy())
myport.ix[0, :] = start_value/(len(iport.columns))
myport = myport.ix[0] * np.exp(iport.cumsum())
myport['value'] = myport.sum(axis=1)
myport.tail()
# # myport = myport/myport.ix[0]

In [None]:
def calc_portfolio_var(returns, weights=None):
    if weights is None: 
        weights = np.ones(returns.columns.size) / \
        returns.columns.size
    sigma = np.cov(returns.T,ddof=0)
    var = (weights * sigma * weights.T).sum()
    return var

In [None]:
def sharpe_ratio(returns, weights = None, risk_free_rate = 0.045):
    n = returns.columns.size
    if weights is None: weights = np.ones(n)/n
    # get the portfolio variance
    var = calc_portfolio_var(returns, weights)
    # and the means of the stocks in the portfolio
    means = returns.mean()
    # and return the sharpe ratio
    risk_free_rate = np.log(1+risk_free_rate)/365
    return (means.dot(weights) - risk_free_rate)/np.sqrt(var)
sharpe_ratio(iport)
# iport.mean()

In [None]:
def y_f(x): return 2+x**2

import scipy as sp
import scipy.optimize as scopt
import scipy.stats as spstats
scopt.fmin(y_f, 1000)

In [None]:
def negative_sharpe_ratio_n_minus_1_stock(weights, 
                                          returns, 
                                          risk_free_rate):
    """
    Given n-1 weights, return a negative sharpe ratio
    """
    weights2 = sp.append(weights, 1-np.sum(weights))
    return -sharpe_ratio(returns, weights2, risk_free_rate)
def optimize_portfolio(returns, risk_free_rate):
    """ 
    Performs the optimization
    """
    # start with equal weights
    w0 = np.ones(returns.columns.size-1, 
                 dtype=float) * 1.0 / returns.columns.size
    # minimize the negative sharpe value
    w1 = scopt.fmin(negative_sharpe_ratio_n_minus_1_stock, 
                    w0, args=(returns, risk_free_rate))
    # build final set of weights
    final_w = sp.append(w1, 1 - np.sum(w1))
    # and calculate the final, optimized, sharpe ratio
    final_sharpe = sharpe_ratio(returns, final_w, risk_free_rate)
    return (final_w, final_sharpe)
optimize_portfolio(iport, 0.045)

In [None]:
def objfun(W, R, target_ret):
    stock_mean = np.mean(R,axis=0)
    port_mean = np.dot(W,stock_mean) # portfolio mean
    cov=np.cov(R.T) # var-cov matrix
    port_var = np.dot(np.dot(W,cov),W.T) # portfolio variance
    penalty = 2000*abs(port_mean-target_ret)# penalty 4 deviation
    return np.sqrt(port_var) + penalty # objective function
def calc_efficient_frontier(returns):
    result_means = []
    result_stds = []
    result_weights = []
    
    means = returns.mean()
    min_mean, max_mean = means.min(), means.max()
    
    nstocks = returns.columns.size
    
    for r in np.linspace(min_mean, max_mean, 100):
        weights = np.ones(nstocks)/nstocks
        bounds = [(0,1) for i in np.arange(nstocks)]
        constraints = ({'type': 'eq', 
                        'fun': lambda W: np.sum(W) - 1})
        results = scopt.minimize(objfun, weights, (returns, r), 
                                 method='SLSQP', 
                                 constraints = constraints,
                                 bounds = bounds)
        if not results.success: # handle error
            raise Exception(result.message)
        result_means.append(np.round(r,4)) # 4 decimal places
        std_=np.round(np.std(np.sum(returns*results.x,axis=1)),6)
        result_stds.append(std_)
        
        result_weights.append(np.round(results.x, 5))
    return {'Means': result_means, 
            'Stds': result_stds, 
            'Weights': result_weights}
frontier_data = calc_efficient_frontier(iport)
# first five risk levels
frontier_data['Stds'][:5]
# first five mean returns
frontier_data['Means'][:5]
# first five sets of optimal weights
frontier_data['Weights'][:5]

In [None]:
def plot_efficient_frontier(frontier_data):
    plt.figure(figsize=(12,8))
    plt.title('Efficient Frontier')
    plt.xlabel('Standard Deviation of the porfolio (Risk))')
    plt.ylabel('Return of the portfolio')
    plt.plot(frontier_data['Stds'], frontier_data['Means'], '--'); 
    
plot_efficient_frontier(frontier_data)

In [None]:
z = spstats.norm.ppf(0.95)
VaR = 10000 * (z * iport.INFY.std())
VaR

In [None]:
import matplotlib.mlab as mlab
# draw a 99% one-tail confidence interval
x = np.linspace(-4,4,101)
y = np.exp(-x**2/2) / np.sqrt(2*np.pi)
x2 = np.linspace(-4,-2.33,101)
y2 = np.exp(-x2**2/2) / np.sqrt(2*np.pi)
f = plt.figure(figsize=(12,8))
plt.plot(x,y*100, linewidth=2)
xf, yf = mlab.poly_between(x2, 0*x2, y2*100)
plt.fill(xf, yf, facecolor='g', alpha=0.5)
plt.gca().set_xlabel('z-score')
plt.gca().set_ylabel('Frequency %')
plt.title("VaR based on the standard normal distribution")
bbox_props = dict(boxstyle="rarrow,pad=0.3", fc="w", ec="b", lw=2)
t = f.text(0.25, 0.35, "99% VaR confidence level", ha="center", va="center", 
           rotation=270,
            size=15,
            bbox=bbox_props)
plt.savefig('5104OS_09_21.png', bbox_inches='tight', dpi=300)

In [None]:
ret[['GALLISPAT', 'INFY']].plot()

In [None]:
get_hist_data(hist_data, 'INFY')['2001-04']

In [None]:
def rand_weights(n):
    ''' Produces n random weights that sum to 1 '''
    k = np.random.rand(n)
    return k / sum(k)

def random_portfolio(returns):
    ''' 
    Returns the mean and standard deviation of returns for a random portfolio
    '''

    p = np.asmatrix(np.mean(returns, axis=1))
    w = np.asmatrix(rand_weights(returns.shape[0]))
    C = np.asmatrix(np.cov(returns))
    
    mu = w * p.T
    sigma = np.sqrt(w * C * w.T)
    
    # This recursion reduces outliers to keep plots pretty
    if sigma > 2:
        return random_portfolio(returns)
    return mu, sigma

In [None]:
ret.dropna(axis=1)

In [None]:
symbol_list['INFY']