In [286]:
from sqlalchemy import create_engine
import pandas
import tushare as ts
import numpy as np
import collections
from sklearn import cluster, covariance, manifold

In [247]:
class BankAccount:
    def __init__(self, deposit,interest_rate):
        self.deposit = deposit
        
    def show_balance(self):
        print self.deposit
        
    def take_loans(self,amount_of_loans):
        self.amount_of_loans = amount_of_loans

In [282]:
class StockAccount(BankAccount):
    def __init__(self, deposit, interest_rate=3,trading_expense=0.2, initial_holdings={}):
        BankAccount.__init__(self, deposit, interest_rate)
        self.trading_expense = trading_expense/100
        self.holdings = initial_holdings
        self.trading_history = {}

    def check_account(self):
        print 'balance:',self.deposit, 'holdings:',self.holdings,
    
    def record_trading_history(function):
        def add_trading_history(self, stock_code, price, shares, date):
            self.trading_history[date] = self.trading_history.get(date, [])
            self.trading_history[date].append((function.func_name,price,stock_code, shares))
            return function(self, stock_code, price, shares, date)
        return add_trading_history
    
    @record_trading_history
    def buy(self, stock_code, price, shares, date):
        cost = price*shares*(1+self.trading_expense)
        if self.deposit > cost:
            self.deposit = self.deposit - cost
            self.holdings[stock_code] = self.holdings.get(stock_code, 0) + shares
        else:
            print 'not enough balance'
    
    @record_trading_history
    def sell(self, stock_code, price,shares, date):
        self.holdings[stock_code] = self.holdings.get(stock_code, 0)
        if self.holdings[stock_code] > shares:
            self.holdings[stock_code] = self.holdings[stock_code] - shares
            self.deposit = self.deposit + price*shares*(1-self.trading_expense) 
        else:
            print 'no stocks to sell'
    
    @record_trading_history
    def borrow_stocks(self, stock_code, price,shares, date):
        pass

In [283]:
new_account = StockAccount(10000)

In [268]:
new_account.buy('1234',19.2, 123, '09/12')

In [269]:
new_account.sell('1234',20.1,100,'09/13')

In [270]:
new_account.buy('234',30,100,'09/14')

In [271]:
new_account.trading_history

{'09/12': [('buy', 19.2, '1234', 123)],
 '09/13': [('sell', 20.1, '1234', 100)],
 '09/14': [('buy', 30, '234', 100)]}

In [272]:
new_account.check_account()

balance: 6633.6568 holdings: {'1234': 23, '234': 100}


In [273]:
new_account.trading_history

{'09/12': [('buy', 19.2, '1234', 123)],
 '09/13': [('sell', 20.1, '1234', 100)],
 '09/14': [('buy', 30, '234', 100)]}

In [274]:
new_account.check_account()

balance: 6633.6568 holdings: {'1234': 23, '234': 100}


In [275]:
new_account.sell('1234',20, 100, '09/14')

no stocks to sell


In [278]:
new_account.sell('234',20, 100, '09/14')

no stocks to sell


In [277]:
new_account.check_account()

balance: 6633.6568 holdings: {'1234': 23, '234': 100}


In [56]:
hs300 = ts.get_hs300s()
for code in [i for i in hs300['code']]:
    ts.get_k_data(code).to_csv('hs300.csv', mode='a', header=False)

NameError: name 'ts' is not defined

In [287]:
hs300 = pandas.read_csv('hs300.csv')

In [305]:
def train_data(hs300,date_start, date_end):
    hs300 = hs300[(hs300['date'] > date_start) & (hs300['date'] <= date_end)]
    hs300_group = hs300.groupby('code')
    value_count = collections.Counter([len(hs300_group.get_group(x)) for x in hs300_group.groups])
    most_common_stock_frequency = max(value_count, key=value_count.get)
    lst = [hs300_group.get_group(x) for x in hs300_group.groups if len(hs300_group.get_group(x))==most_common_stock_frequency]
    return lst

In [303]:
def train_model(list_stocks):
    codes = np.asarray([i['code'].values[1] for i in list_stocks])
    open = np.array([i.open.values for i in list_stocks]).astype(np.float)
    close = np.array([i.close.values for i in list_stocks]).astype(np.float)
    variation = close - open
    edge_model = covariance.GraphLassoCV()
    X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    l1 = []
    for i in range(n_labels + 1):
        l1.append(codes[labels == i])
    return l1

In [307]:
train_data = train_data(hs300, '2016-01-01', '2016-06-30')

In [310]:
import time

In [311]:
start = time.time()
clustering = train_model(train_data)
end = time.time()
print end - start

55.4599030018


In [314]:
clustering

[array([601088,     63, 601898, 600188, 601225, 600718, 600208,    709,
           712, 600008]),
 array([600066]),
 array([601601]),
 array([600074]),
 array([601618, 300015]),
 array([  2081, 600029]),
 array([600104, 600741, 601288, 600406,    876,   2415]),
 array([46]),
 array([     9,   2183,    792, 603993]),
 array([600737]),
 array([600582, 600642, 600157, 600705, 601808, 600837, 601866,    826,
        601919,    778, 600485, 600028]),
 array([600674, 600886,   2470]),
 array([601166]),
 array([601628, 601169, 300251]),
 array([600606, 600660, 600340]),
 array([600663]),
 array([601009,   2142, 600015, 601988, 601998, 600011]),
 array([600588, 300059, 300070, 300085,     69, 300133,   2153, 601216,
          2195,    156,    686, 600446, 603000,   2292, 300315, 601928,
          2399, 601992,   2500,    977, 600570, 300027]),
 array([   538, 600688, 600196, 300058, 600037, 600998,   2475]),
 array([600518]),
 array([601111,   2202,   2736, 600804, 601555, 600362,    423, 6000

In [None]:
def trade_sim(stock_account, start_date, end_date):
    