In [None]:
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

In [None]:
key_df = pd.read_csv('./data/screens/1yeartopperformers/top100alpha.csv')
#key_df.rename({'Unnamed: 0':'SYM'}, axis=1, inplace=True)
#key_df.to_csv('./data/screens/volume600k/highmeanreturn_lowstd_top_performers_2_months.csv')
key_df.head()

In [None]:
key_df.head(100)

In [None]:
data_dir = './data/screens/1yeartopperformers/'
tail = '190820.pickle'

def load_set(stock, data_dir=data_dir, tail=tail):
    df = pd.read_pickle('{}{}{}'.format(data_dir, stock, tail))
    return df


In [None]:
top_100 = key_df.sort_values(by='mean', ascending=False)['SYMBOL'].head(100)
## Top 100 sorted according to mean return

In [None]:
top_100 = key_df['SYMBOL'].head(100)

In [None]:
top_100

In [None]:
fig, axs = plt.subplots(10, 10, figsize=(100,100))

for i, sym in enumerate(top_100):
    c = divmod(i, 10)
    df = load_set(data_dir=data_dir, stock=sym, tail=tail)
    axs[c[0], c[1]].plot(df['date'], df['close'], label='Close Price on {}: ${}'.format(df.iloc[0]['date'], df.iloc[0]['close']))
    axs[c[0], c[1]].set_title(sym)
    axs[c[0], c[1]].set_ylabel('Close Price in $')
    axs[c[0], c[1]].set_xlabel('Date')
    axs[c[0], c[1]].legend()

plt.savefig('./data/images/1yeartop100.png', transparent=False)

In [None]:
df = load_set('DOCU')

In [None]:
df.head(1)

In [None]:
df.iloc[0]

In [None]:
plt.plot(df['pct_change'])

In [None]:
df['pct_change'] = df['pct_change'] * 100
description = df['pct_change'].describe()

In [None]:
df['pct_change']

In [None]:
df['mean'] = description['mean']
df['std'] = description['std']
df['+1std'] = df['mean'] + df['std'] * 1
df['+2std'] = df['mean'] + df['std'] * 2
df['-1std'] = df['mean'] - df['std'] * 1
df['-2std'] = df['mean'] - df['std'] * 2

In [None]:
plt.plot(df['date'], df['pct_change'])
plt.plot(df['date'], df['mean'], c='r', label='Mean Return {:.2f}%'.format(description['mean']))
plt.plot(df['date'], df['+1std'], c='g', linestyle='dashed') 
plt.plot(df['date'], df['+2std'], c='g', linestyle='dashed') 
plt.plot(df['date'], df['-1std'], c='g', linestyle='dashed') 
plt.plot(df['date'], df['-2std'], c='g', linestyle='dashed') 
plt.legend()

In [None]:
fig, axs = plt.subplots(10, 10, figsize=(100,100))

for i, sym in enumerate(top_100):
    c = divmod(i, 10)
    df = load_set(data_dir=data_dir, stock=sym, tail=tail)
    df['pct_change'] = df['pct_change'] * 100
    description = df['pct_change'].describe()
    df['mean'] = description['mean']
    df['std'] = description['std']
    df['+1std'] = df['mean'] + df['std'] * 1
    df['+2std'] = df['mean'] + df['std'] * 2
    df['-1std'] = df['mean'] - df['std'] * 1
    df['-2std'] = df['mean'] - df['std'] * 2
    
    df = df.iloc[-42:]
    
    axs[c[0], c[1]].plot(df['date'], df['pct_change'])
    axs[c[0], c[1]].plot(df['date'], df['mean'], c='r', label='Mean Return {:.2f}%'.format(description['mean']))
    axs[c[0], c[1]].plot(df['date'], df['+1std'], c='g', linestyle='dashed', label='Std Dev = {:.2f}%'.format(description['std'])) 
    axs[c[0], c[1]].plot(df['date'], df['+2std'], c='g', linestyle='dashed') 
    axs[c[0], c[1]].plot(df['date'], df['-1std'], c='g', linestyle='dashed') 
    axs[c[0], c[1]].plot(df['date'], df['-2std'], c='g', linestyle='dashed') 
    
    axs[c[0], c[1]].set_title(sym)
    axs[c[0], c[1]].set_ylabel('Daily % Change')
    axs[c[0], c[1]].set_xlabel('Date')
    axs[c[0], c[1]].legend()
    
#plt.savefig('./data/images/1year_top100_dailyreturns.png', transparent=False)

#### Further visualizations

Given the above plots, I have two questions in my mind. 

First, if I subtract the rolling mean from the price in the first chart, what will happen?

Second, if I divide the % change by the volume in the second, what will happen? 

My goal for both of these questions is to discover if there is a clear sinusoidal relationship between time and price. 

#### Subtract the moving average

In [None]:
stock = top_100[0]
df = load_set(data_dir=data_dir, stock=sym, tail=tail)

In [None]:
df['rolling30'] = df['close'][::-1].rolling(30).mean()[::-1]
df['close-rolling30'] = df['close'] - df['rolling30']

plt.plot(df['date'], df['close-rolling30'])

In [None]:
fig, axs = plt.subplots(10, 10, figsize=(100,100))

for i, sym in enumerate(top_100):
    c = divmod(i, 10)
    df = load_set(data_dir=data_dir, stock=sym, tail=tail)
    df['rolling30'] = df['close'][::-1].rolling(30).mean()[::-1]
    df['close-rolling30'] = df['close'] - df['rolling30']
    df['close-rolling30divolume'] = df['close-rolling30'] / df['volume']
    axs[c[0], c[1]].plot(df['date'], df['close-rolling30divolume'])
    axs[c[0], c[1]].set_title(sym)
    axs[c[0], c[1]].set_ylabel('Close Price Minus 30 Day Rolling Average in $')
    axs[c[0], c[1]].set_xlabel('Date')
    #axs[c[0], c[1]].legend()

#plt.savefig('./data/images/1yeartop100.png', transparent=False)

#### Divide % Change by volume

In [None]:
fig, axs = plt.subplots(10, 10, figsize=(100,100))

for i, sym in enumerate(top_100):
    c = divmod(i, 10)
    df = load_set(data_dir=data_dir, stock=sym, tail=tail)
    df['pct_change'] = df['pct_change'] * 100
    df['pct_change_by_volume'] = df['pct_change'] / df['volume']
    description = df['pct_change_by_volume'].describe()
    df['mean'] = description['mean']
    df['std'] = description['std']
    df['+1std'] = df['mean'] + df['std'] * 1
    df['+2std'] = df['mean'] + df['std'] * 2
    df['-1std'] = df['mean'] - df['std'] * 1
    df['-2std'] = df['mean'] - df['std'] * 2
    
    df = df.iloc[-42:]
    axs[c[0], c[1]].plot(df['date'], df['pct_change_by_volume'])
    axs[c[0], c[1]].plot(df['date'], df['mean'], c='r', label='Mean Return {:.5f}% Per Volume'.format(description['mean']))
    axs[c[0], c[1]].plot(df['date'], df['+1std'], c='g', linestyle='dashed', label='Std Dev = {:.5f}% Per Volume'.format(description['std'])) 
    axs[c[0], c[1]].plot(df['date'], df['+2std'], c='g', linestyle='dashed') 
    axs[c[0], c[1]].plot(df['date'], df['-1std'], c='g', linestyle='dashed') 
    axs[c[0], c[1]].plot(df['date'], df['-2std'], c='g', linestyle='dashed') 
    
    axs[c[0], c[1]].set_title(sym)
    axs[c[0], c[1]].set_ylabel('Daily % Change div volume')
    axs[c[0], c[1]].set_xlabel('Date')
    axs[c[0], c[1]].legend()
    
#plt.savefig('./data/images/1year_top100_dailyreturns.png', transparent=False)