In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from dateutil.relativedelta import relativedelta
from symbol import Symbol
from index import Index
from portifolio import Portifolio
import seaborn as sns
from sklearn.preprocessing import scale
from scipy import stats


sns.set_style("dark")

%matplotlib inline

In [2]:
def calculate_variance(returns, weights=None):
    if weights is None: 
        weights = np.ones(returns.columns.size) / \
        returns.columns.size
    sigma = np.cov(returns.T,ddof=0)
    var = (weights * sigma * weights.T).sum()
    return var
def sharpe_ratio(returns, weights = None, risk_free_rate = 0.075):
    if isinstance(returns, pd.Series):
        returns = pd.DataFrame(returns)
        return sharpe_ratio(returns, risk_free_rate=risk_free_rate)
    n = returns.columns.size
    if weights is None: weights = np.ones(n)/n
    # get the portfolio variance
    var = abs(calculate_variance(returns, weights))
    # and the means of the stocks in the portfolio
    means = returns.mean()
    # and return the sharpe ratio
    risk_free_rate = np.log(1+risk_free_rate)/252
    sharpe = (means.dot(weights) - risk_free_rate)/np.sqrt(var)
    return sharpe

In [3]:
sym = Symbol()
ind = Index()
port = Portifolio()

In [18]:
infy_data = sym.get_symbol_hist('infy')
infy_div = sym.get_dividend_data('infy')
index_data = ind.get_index_close(index_list='nifty_50')
adj_infy = adjust_closing_prices(infy_data.set_index('date'), infy_div)

Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=5995, symbol='infy', date=Timestamp('1996-04-30 00:00:00'), action='DIVIDEND', value=3.5)
Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=5996, symbol='infy', date=Timestamp('1996-10-28 00:00:00'), action='DIVIDEND', value=1.5)
Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=5997, symbol='infy', date=Timestamp('1997-05-23 00:00:00'), action='DIVIDEND', value=4.0)
Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=5998, symbol='infy', date=Timestamp('1997-08-19 00:00:00'), action='SPLIT', value=0.5)
Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=5999, symbol='infy', date=Timestamp('1997-12-17 00:00:00'), action='DIVIDEND', value=1.5)
Exception occurred at Cannot add integral value to Timestamp without freq.
Pandas(Index=6000, symbol='infy', date

In [35]:
def adjust_closing_prices(hist_data, dividend_data):
    # Make a copy of hist_data before proceeding
    hist_data = hist_data.copy()
    hist_data['adj_close'] = hist_data.close
    dividend_data = dividend_data.copy()
    if type(dividend_data.index[0]) == pd.tslib.Timestamp:
        dividend_data = dividend_data.reset_index()

    for i in dividend_data.itertuples():
        adj_fact = 1
        if i.value == 0:
                continue
        try:
            date = hist_data.ix[: i.date].index[-1]
            close = hist_data.close.ix[date]
        except Exception as e:
            print('Exception occurred at during date {0}'.format(e))
            print(i)
            continue
        if i.action == 'DIVIDEND':
            adj_fact = (close + i.value) / close
        if i.action == 'SPLIT':
            adj_fact = 1 / i.value

        try:

            hist_data.ix[:date, 'adj_close'] = round((hist_data.adj_close[:date] /
                                                     adj_fact), 4)
            hist_data.ix[date, 'adj_close'] = round((hist_data.adj_close[date] *
                                                    adj_fact), 4)
        except Exception as e:
            print('Exception occurred at {0}'.format(e))
            print(i)

    return hist_data

In [36]:
adj_infy = adjust_closing_prices(infy_data.set_index('date'), infy_div)
infy_adata = pd.DataFrame(adj_infy.adj_close).join(index_data).dropna()
# infy_data.to_csv('Daily_Close.csv')
# infy_data.resample('W-FRI').last().to_csv('Weekly_Close.csv')
# infy_data.resample('M').last().to_csv('Monthly_Close.csv')
# infy_data.resample('Q-MAR').last().to_csv('Quarterly_Close.csv')
# infy_data.resample('A').last().to_csv('Annual_Close.csv')
infy_adata['2015-06']

Unnamed: 0_level_0,adj_close,nifty_50
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-06-01,977.4718,8433.4
2015-06-02,958.7843,8236.45
2015-06-03,964.6317,8135.1
2015-06-04,968.0445,8130.65
2015-06-05,960.2164,8114.7
2015-06-08,954.6793,8044.15
2015-06-09,950.8845,8022.4
2015-06-10,967.3046,8124.45
2015-06-11,953.5338,7965.35
2015-06-12,956.8273,7982.9


In [None]:
port = s.Portifolio(index='nifty_500', start=2010, null_count=5, volume=1000, benchmark='nifty_50')
len(port.symbol_list)

In [None]:
hdata = port.get_symbol_hist()
hret = port.get_symbol_returns().interpolate(limit=5)
iret = port.get_index_returns()
hret = hret.dropna(axis=1)
scaled_hret = pd.DataFrame(scale(hret), columns=hret.columns.copy(), index=hret.index.copy())
hclose = port.get_symbol_close()
pref_symbols = hclose.columns[hclose.ix[-1, :] <= 1000]
pref_symbols = pref_symbols.intersection(hret.columns)
hret = hret[pref_symbols]
hclose = hclose[pref_symbols]
len(hret), len(hret.columns), len(iret)

In [None]:
bench_port = port.create_portifolio(ret=hret['2010':'2015'])
new_port = port.create_portifolio(ret=hret['2016'])
sharpe_ratio(bench_port)

In [None]:
import scipy as sp
import scipy.optimize as scopt
import scipy.stats as spstats
def negative_sharpe_ratio_n_minus_1_stock(weights, 
                                          returns, 
                                          risk_free_rate):
    """
    Given n-1 weights, return a negative sharpe ratio
    """
    weights = weights.round()
    return -sharpe_ratio(returns, weights, risk_free_rate)
def optimize_portfolio(returns, risk_free_rate):
    """ 
    Performs the optimization
    """
    # start with equal weights
    w0 = [100/returns.columns.size for i in range(returns.columns.size)]
    cons = ({'type': 'eq', 'fun': lambda x:  x.sum()-100},
           {'type':'eq', 'fun': lambda x: (x<0).sum()})
    # minimize the negative sharpe value
    w1 = scopt.minimize(negative_sharpe_ratio_n_minus_1_stock, 
                        w0, args=(returns, risk_free_rate),method='COBYLA',
                        options={'disp': True})
    # build final set of weights
    final_w = (w1.x).round()
    # and calculate the final, optimized, sharpe ratio
    final_sharpe = sharpe_ratio(returns, final_w, risk_free_rate)
    return (final_w.round(), final_sharpe)
optimize_portfolio(bench_port.ix[:, 0:10], 0.075)

In [None]:
sharpe_ratio(bench_port.ix[:, 0:6])
sh_w = []
def helsf(returns, hclose):
    max_sh = -np.inf
    i = 0
    n = returns.columns.size
    hclose = hclose[returns.columns].copy()
#     weights = [0 for p in range(n)]
    while i < 1000:
        weights =(constrained_sum_sample_pos(n, 100 + n))
        if hclose.dot(weights).ix[-1] > 10000:
            i = i+0.5
            continue
        if weights not in sh_w:
            sh_w.append((weights))
        else:
            i = i+0.5
            continue
        weights = np.array(weights)
        sharpe_rat = sharpe_ratio(returns, weights)
        if sharpe_rat > max_sh:
            max_i = i
            max_sh = sharpe_rat
            max_weights = weights
        i = i+1
    return max_i, max_sh, max_weights
helsf(hret.ix[:, 0:10], hclose)

In [None]:
hret

In [None]:
(8879.0, 8.6029805156065411, array([9, 3, 9, 2, 5, 2]))
(5766, 253.56645536686179, array([14,  0,  0,  3,  1,  4,  3,  3,  1,  1]))

In [None]:
[ 24.0438623 ,  12.02346529,  18.95166979,   5.76427132,
          7.8546917 ,   9.34415854,   0.37967011,  11.13532162,
          8.3874864 ,   6.33986876]

In [None]:
import random

def constrained_sum_sample_pos(n, total):
    """Return a randomly chosen list of n positive integers summing to total.
    Each such list is equally likely to occur."""

    dividers = sorted(random.sample(range(1, total), n - 1))
    return [a - b - 1 for a, b in zip(dividers + [total], [0] + dividers)]
constrained_sum_sample_pos(10, 30)

In [None]:
def prob(ret):
    vals = pd.cut(ret*100, bins=[-20, -10, -5, -2,-0.5, 0.5, 2, 5, 10, 20], labels=[-4, -3, -2, -1, 0, 1, 2, 3, 4]).value_counts()
    prob = (vals/vals.sum()).sort_index()
    return prob
prob_ret = hret.dropna(how='all', axis=1).apply(prob)
prob_ret.index = prob_ret.index.as_ordered()
prob_ret=prob_ret.T
prob_cond =(prob_ret.loc[:, [-4,-3]] == 0)
prob_ret[prob_cond.sum(axis=1) == len(prob_cond.columns)]

In [None]:

sharpe = hret['2010':'2015'].apply(sharpe_ratio)
beta = port.calculate_capm(returns=hret['2010':'2015'], benchmark_returns=iret['2010':'2015']).reset_index(drop=True).set_index('symbol')

In [None]:
total_returns = pd.DataFrame(hret.sum(), columns=['tot_returns'])
avg_returns = pd.DataFrame(hret.mean(), columns=['avg_returns'])
std_dev = pd.DataFrame(hret.std(), columns=['std_dev'])
returns = total_returns.join(avg_returns).join(std_dev)
returns = returns.join(pd.DataFrame(sharpe, columns=['sharpe'])).join(beta.beta)
returns.sort_values('tot_returns', ascending=False).head(10)

In [None]:
returns.plot(kind='scatter',x='sharpe', y='beta')

In [None]:
sharpe = sharpe.sort_values(ascending=False)
symbol_list = sharpe.index[0:20]
print(symbol_list)
new_sharpe = hret['2016':].apply(sharpe_ratio).sort_values(ascending=False)
hret['2016'][symbol_list].sum()

In [None]:
sym_list = returns.sort_values('sharpe', ascending=False).head(30).index.tolist()
opt_port = s.Portifolio(start=2016,index='nifty_50', benchmark='nifty_50')
hdata = opt_port.get_symbol_hist()
hret = opt_port.get_symbol_returns()
iret = opt_port.get_index_returns()
hret = hret.dropna(axis=1)
scaled_hret = pd.DataFrame(scale(hret), columns=hret.columns.copy(), index=hret.index.copy())
sharpe = hret.apply(sharpe_ratio)
beta = port.calculate_capm(returns=hret, benchmark_returns=iret).reset_index(drop=True).set_index('symbol')
total_returns = pd.DataFrame(hret.sum(), columns=['tot_returns'])
avg_returns = pd.DataFrame(hret.mean(), columns=['avg_returns'])
std_dev = pd.DataFrame(hret.std(), columns=['std_dev'])
opt_returns = total_returns.join(avg_returns).join(std_dev)
opt_returns = opt_returns.join(pd.DataFrame(sharpe, columns=['sharpe'])).join(beta.beta)

In [None]:
# opt_returns.plot(kind='scatter',x='tot_returns', y='std_dev')
ps = sns.regplot(data=opt_returns, x='std_dev', y='sharpe')

In [None]:
infy_ret = ret.infy.copy()
mean = infy_ret.mean()
std = infy_ret.std()
new = pd.Series(np.random.normal(loc=mean, scale=std, size=len(infy_ret)), index=infy_ret.index.copy())
new[abs(new) > mean+2*std] = 0
infy = pd.DataFrame(index=infy_ret.index.copy())
infy['original'] = 1000
infy['original'] = infy.original * np.exp(infy_ret.cumsum())
infy['random'] = 1000
infy['random'] = infy.random * np.exp(new.cumsum())
print(infy.mean())
infy.plot()

In [None]:
infy_ret = ret.infy['2007'].copy()
mean = infy_ret.mean()
std = infy_ret.std()
infy_ret = ret.infy['2007'].copy()
monte = pd.DataFrame(index=infy_ret.index.copy())
for i in range(0, 100):
    new = pd.Series(np.random.normal(loc=mean, scale=std, size=len(infy_ret)), index=infy_ret.index.copy(), name=str(i))
    monte[str(i)] = 1000
    monte[str(i)] = monte[str(i)] * np.exp(new.cumsum())
monte['2007-01':'2007-01-10'].plot(legend=None)

In [None]:
sns.heatmap(ret['2016'].corr(), vmax=.8, square=True)

In [None]:
sns.lmplot(x='infy', y='wipro', data=ret)

In [None]:
gammas = sns.load_dataset("gammas")

# # Plot the response with standard error
# sns.tsplot(data=gammas, time="timepoint", unit="subject",
#            condition="ROI", value="BOLD signal")
# prob_pivot = pd.pivot(prob_ret, index=)
gammas

In [None]:
smeta[(smeta.tech_strength >= 7) & (smeta.mcap > np.mean(smeta.mcap))].sort_values('tech_strength', ascending=False)

In [None]:
sps = pd.read_html('http://techpaisa.com/stock/20microns')
sps[0]
sps[3]
sps[7]