In [1]:
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

In [24]:
def readQuote( inpFileName ):
    return pd.read_csv( inpFileName, header=0, index_col=0 )

In [89]:
# Calculates running average over a number of days, including that day!
def calcRunAvg( myDf, col, inpNumDays ):
    
    # Make sure we are accesing valid average numbers
    numDays = int( inpNumDays )
    if ( numDays < 2 or 
         numDays > myDf.shape[0]-1 ):
        print 'Invalid number of days for running average: ', inpNumDays
        print 'Valid numbers: 2, ', myDF.shape[0]-1
        sys.exit()
        
    # Make sure we are accessing valid columns
    cols = myDf.columns.values
    if ( not col in cols ):
        print 'Invalid column for running average: ', col
        print 'Valid columns: ', cols
        sys.exit()
    
    # Store running average
    runAvg = np.zeros( [myDf.shape[0]] ) + np.nan
    
    # Loop until we hit limit for number of days
    for i in range( 0, myDf.shape[0]-numDays+1 ):
        # Running average 
        runAvg[i] = myDf.ix[ i:i+numDays, col ].mean()
        
    newCol       = 'avg_'+col+'_%i' % inpNumDays
    myDf[newCol] = runAvg

In [119]:
# Calculates and add columns of differences between columns to table
def highLow( myDf ):
    myDf['highlow'] = myQuote['high']-myQuote['low']
def openClose( myDf ):
    myDf['openclose'] = myQuote['close'] - myQuote['open']

In [120]:
# Calculates spearman correlation coefficients for df broken into N_samples
# Lower numbers more noise, but fixes obvious trend in means
def corrTest( bar, N_samples ):
    labels     = bar.columns.values
    N_lab      = len( labels )
    N_corr     = N_lab * ( N_lab - 1 ) / 2
    N_elements = bar.shape[0]

    corr       = np.zeros( N_corr    )
    N_samp     = 0

    # Create subsample copies
    for         i in range(   0, N_samples ):

        lower   = N_elements / N_samples *  i
        upper   = N_elements / N_samples * (i+1)

        N_samp += upper-lower

        subSamp = bar.iloc[ lower:upper ]

        counter = 0

        # Loop over labels
        for     j in range(   0, N_lab     ):
            for k in range( j+1, N_lab     ):

                temp = subSamp[ labels[j] ].corr( subSamp[[labels[k]]], method='spearman' )

                corr [ counter ] += temp * ( upper - lower )

                counter = counter + 1


    corr = corr / N_samp

    print bar.corr(method='spearman')    
    
    counter = 0
    for     j in range(   0, N_lab     ):
        for k in range( j+1, N_lab     ):
            print '%11s %11s %10.6f' % (labels[j], labels[k], corr[counter])
            counter += 1


In [148]:
# Will convert non-list to list
def checkList( inpL ):
    if ( not isinstance( inpL, list ) ):
        return [ inpL ]
    return inpL
    
# Generates averages based on input array, and cross over points
# Will generate crosses as label_label indexes, 
#   1 for short dropping below long average, 
#  -1 for long popping over short, 
#   0 otherwise
def avgCross( myDf, inpL, inpN ):
    
    # Make sure we are dealing with lists
    labList = checkList( inpL )
    numList = checkList( inpN )
        
    allLabels = []
        
    # Check for averages, if none exists generate one
    for    label in labList:
        
        allLabels.append( label )
        
        for  num in numList:

            avgName = 'avg_'+label+'_%i'%num
            
            allLabels.append( avgName )
            
            if ( not avgName in myDf ):
                calcRunAvg( myDf, label, num )

    # Find crosses
    for     i in range(   0, len( allLabels ) ):
        for j in range( i+1, len( allLabels ) ):

            lower  = myDf[ allLabels[i] ] < myDf[ allLabels[j] ]
            lower  = lower.values
            trans  = np.zeros( len(lower) )
    
            for k in range( 1, len( lower ) ):
                if ( lower[k-1] == False and
                     lower[k  ] == True  ):
                    trans [k  ] =  1.0
                if ( lower[k-1] == True  and
                     lower[k  ] == False ):
                    trans [k  ] = -1.0
                    
            myDf[ allLabels[i]+'_'+allLabels[j] ] = trans

In [2]:
inpFile = 'quotes/ibm.csv'

In [25]:
myQuote = readQuote( inpFile )

In [91]:
#foo=myQuote.columns.values

#print myQuote.ix[0:10]['close'].mean()
#print myQuote.ix[0:10,'close'].mean()

bar = myQuote.copy()
#print bar
bar.is_copy = False

highLow( bar )
openClose( bar )
calcRunAvg( bar, 'close', 3 )
print bar.head()

             close     volume    open    high       low  highlow  openclose  \
date                                                                          
2017/03/10  177.83  3100107.0  178.21  179.49  177.4200   2.0700      -0.38   
2017/03/09  177.18  5443665.0  179.15  179.25  175.8800   3.3700      -1.97   
2017/03/08  179.45  3557388.0  180.75  180.95  179.3000   1.6500      -1.30   
2017/03/07  180.38  2977496.0  180.71  181.29  180.1997   1.0903      -0.33   
2017/03/06  180.47  3199175.0  179.72  180.99  179.5700   1.4200       0.75   

            avg_close_3  
date                     
2017/03/10   178.153333  
2017/03/09   179.003333  
2017/03/08   180.100000  
2017/03/07   180.300000  
2017/03/06   180.350000  


In [87]:
# Covariance of all variables
# Break into chunks of ___ days, so average isn't skewed to general trend of rise in the market
# ^ Can use metrics to determine slumps, shooting up, constant growth, volatile
# Plot trends over time
# Test differrent time varying models

In [149]:
temptemp = bar.iloc[0:20]
temptemp.is_copy = False
avgCross( temptemp, ['close','open'], [3,5] )

In [150]:
temptemp.head()

Unnamed: 0_level_0,close,volume,open,high,low,highlow,openclose,avg_close_3,avg_close_5,avg_open_3,...,avg_close_3_avg_close_5,avg_close_3_open,avg_close_3_avg_open_3,avg_close_3_avg_open_5,avg_close_5_open,avg_close_5_avg_open_3,avg_close_5_avg_open_5,open_avg_open_3,open_avg_open_5,avg_open_3_avg_open_5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017/03/10,177.83,3100107.0,178.21,179.49,177.42,2.07,-0.38,178.153333,179.062,179.37,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017/03/09,177.18,5443665.0,179.15,179.25,175.88,3.37,-1.97,179.003333,179.506,180.203333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
2017/03/08,179.45,3557388.0,180.75,180.95,179.3,1.65,-1.3,180.1,180.176,180.393333,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-1.0,-1.0,1.0
2017/03/07,180.38,2977496.0,180.71,181.29,180.1997,1.0903,-0.33,180.3,180.676,180.156667,...,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0
2017/03/06,180.47,3199175.0,179.72,180.99,179.57,1.42,0.75,180.35,180.564,180.546667,...,0.0,-1.0,1.0,-1.0,-1.0,0.0,0.0,1.0,1.0,-1.0
