In [21]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import csv
from pandas.tseries.offsets import MonthEnd
from datetime import datetime, timedelta, date
import requests
import json 
import ta




In [22]:
def labelling_multiclass(dataframe):
    """This generates labels for the dataframes we wish to predict price.
    A label of 1 suggests an increase in price between months while a label of 0 indicates
    a decrease in price. If the current price is greater than the previous price then a"""
    dataframe = dataframe.apply(pd.to_numeric)
    #introduce the absolute price change as a variable
    dataframe['ATR'] = ta.volatility.average_true_range(dataframe.High, dataframe.Low, dataframe.Close, n=14, 
                                                        fillna=True)
    
    #calculate the percentage change of ATR between months 
    dataframe['ATR_%_change'] = abs(dataframe['ATR'].pct_change())
    
    #calculate the average ATR pct change
    average_ATR_pct_change = dataframe['ATR_%_change'].mean()
    
    
    
    empty_dict = dict()
    prev_close =0
    counter = 0
    
    #print (dataframe.head(5))
    
    for index,cols in dataframe.iterrows():
        
        counter +=1
        
        current_close = cols['Close']
        ATR_price_change = cols['ATR_%_change']
        
        if (float(prev_close) <= float(current_close)) and (float(ATR_price_change) <= float(average_ATR_pct_change)):
            
            #signals an increase in price between last month and next month
            
            empty_dict[index] = 1
        # 2 represents an increase that was strong greater than the average ATR   
        elif (float(prev_close) <= float(current_close)) and (float(ATR_price_change) >= float(average_ATR_pct_change)):
            empty_dict[index] = 2
        
        elif (float(prev_close) >= float(current_close)and (float(ATR_price_change) <= float(average_ATR_pct_change))):
            empty_dict[index]=0
            
        #3 represents a decrease in the price that had a larger ATR    
        elif (float(prev_close) >= float(current_close)and (float(ATR_price_change) >= float(average_ATR_pct_change))):
            empty_dict[index] = 3
        
        else:
            empty_dict[index]= 0
            

        #reassign prev close to the current close     
        prev_close = current_close
    
    return empty_dict

In [23]:
#read in dataframes with MONTHLY data 
MSFT = pd.read_csv(r"/Users/fahad/project_repo/data/external/MSFT_monthly.csv")
AAPL = pd.read_csv(r"/Users/fahad/project_repo/data/external/AAPL_monthly.csv")
AMZN = pd.read_csv(r"/Users/fahad/project_repo/data/external/AMZN_monthly.csv")

#GIVE COLUMN NAMES 
MSFT.columns = ['Date','Open','High','Low','Close','Volume']
AAPL.columns = ['Date','Open','High','Low','Close','Volume']
AMZN.columns = ['Date','Open','High','Low','Close','Volume']


#set date column as the index
MSFT = MSFT.set_index(['Date'])
AAPL = AAPL.set_index(['Date'])
AMZN = AMZN.set_index(['Date'])



In [24]:
#sort from old to new
MSFT = MSFT.sort_index()
AAPL = AAPL.sort_index()
AMZN = AMZN.sort_index()

In [25]:
#SPLIT THE DATA LEARN FROM 1998 TO 2015 AND PREDICT THE REMAINING 4 YEARS 
#TRAINING DATA AND VALIDATION DATA 
MSFT_98_to_2015 = MSFT[:206]
AAPL_98_to_2015 = AAPL[:206]
AMZN_98_to_2015 = AMZN[:206]

In [26]:
#TEST DATA 
MSFT_2015_to_2019 = MSFT[206:]
AAPL_2015_to_2019 = AAPL[206:]
AMZN_2015_to_2019 = AMZN[206:]

export_MSFT= MSFT_2015_to_2019.to_csv(r'MSFT_15_to_19.csv')

export_AAPL= AAPL_2015_to_2019.to_csv(r'AAPL_15_to_19.csv')

export_AMZN = AMZN_2015_to_2019.to_csv(r'AMZN_15_to_19.csv')



# TRAINING  DATA 1998 TO 2015

In [27]:

#returns the a dictionary object with the dates and corresponding buy/sell signals from 98 to 2015 with labels attached
#takes empty dict as first param, and dataframe object with OHLCV as second argument

#dictionaries with dates and labels for the 
MSFT_98_2015_labelled = labelling_multiclass(MSFT_98_to_2015)
AAPL_98_2015_labelled = labelling_multiclass(AAPL_98_to_2015)
AMZN_98_2015_labelled = labelling_multiclass(AMZN_98_to_2015)



# TEST LABELS

In [7]:
#dicts containing the labels 
MSFT_2015_2019_labelled = labelling_multiclass(MSFT_2015_to_2019)
AAPL_2015_2019_labelled = labelling_multiclass(AAPL_2015_to_2019)
AMZN_2015_2019_labelled = labelling_multiclass(AMZN_2015_to_2019)


In [8]:
#add the label values to the dataframe
MSFT_98_to_2015['Labels'] = MSFT_98_2015_labelled.values()
AAPL_98_to_2015['Labels'] = AAPL_98_2015_labelled.values()
AMZN_98_to_2015['Labels'] = AMZN_98_2015_labelled.values()

MSFT_2015_to_2019['Labels'] = MSFT_2015_2019_labelled.values()
AAPL_2015_to_2019['Labels'] = AAPL_2015_2019_labelled.values()
AMZN_2015_to_2019['Labels'] = AMZN_2015_2019_labelled.values()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

# IMPORT THE DAILY DATAFRAMES 


In [9]:
fields = ['Date', 'Open','High','Low','Close','Volume','RSI','MACD','Ichimoku_Span_A','Ichimoku_Span_B','ATR',
         'BB_High_Indicator','BB_Low_Indicator']

MSFT_df = pd.read_csv(r"/Users/fahad/project_repo/data/interim/MSFT.csv", usecols=fields)
AAPL_df = pd.read_csv(r"/Users/fahad/project_repo/data/interim/AAPL.csv", usecols=fields)
AMZN_df = pd.read_csv(r"/Users/fahad/project_repo/data/interim/AMZN.csv", usecols=fields)

In [10]:
#NEED TO split the dai

In [11]:
from ipynb.fs.full.Neural_Network_Preparation import get_ndays_monthly_flattened

#get the flattened dict for microsoft, appl and amzn that we will train on, 
MSFT_98_to_2015_flattened_dict = get_ndays_monthly_flattened(MSFT_98_to_2015,MSFT_df,30)

AAPL_98_to_2015_flattened_dict = get_ndays_monthly_flattened(AAPL_98_to_2015, AAPL_df,30)

AMZN_98_to_2015_flattened_dict = get_ndays_monthly_flattened(AMZN_98_to_2015, AMZN_df, 30)



MSFT_15_to_2019_flattened_dict = get_ndays_monthly_flattened(MSFT_2015_to_2019,MSFT_df,30)

AAPL_15_to_2019_flattened_dict = get_ndays_monthly_flattened(AAPL_2015_to_2019,AAPL_df,30)

AMZN_15_to_2019_flattened_dict = get_ndays_monthly_flattened(AMZN_2015_to_2019, AMZN_df,30)




In [12]:
#assign the labels to pandas dataframe objects and give label title for the 98 to 2015 data
MSFT_labels_98_to_2015_df = pd.Series(MSFT_98_2015_labelled).to_frame("Label")
AAPL_labels_98_to_2015_df = pd.Series(AAPL_98_2015_labelled ).to_frame("Label")
AMZN_labels_98_to_2015_df = pd.Series(AMZN_98_2015_labelled ).to_frame("Label")

In [13]:
#assign the labels for the dates from 2015 to 2019 to a dataframe 
MSFT_labels_2015_to_2019_df = pd.Series(MSFT_2015_2019_labelled).to_frame("Label")
AAPL_labels_2015_to_2019_df = pd.Series(AAPL_2015_2019_labelled ).to_frame("Label")
AMZN_labels_2015_to_2019_df = pd.Series(AMZN_2015_2019_labelled ).to_frame("Label")




# Create flattened dataframes with labels

In [14]:
#create separate dataframes for the training data and the test data 

#convert the flattened dictionaries to dataframes 
flattened_df_MSFT_98_to_2015 = pd.DataFrame.from_dict(MSFT_98_to_2015_flattened_dict).T
flattened_df_AAPL_98_to_2015 = pd.DataFrame.from_dict(AAPL_98_to_2015_flattened_dict).T
flattened_df_AMZN_98_to_2015 = pd.DataFrame.from_dict(AMZN_98_to_2015_flattened_dict).T



In [15]:
#convert the flattened dictionaries to dataframes 
flattened_df_MSFT_2015_to_2019 = pd.DataFrame.from_dict(MSFT_15_to_2019_flattened_dict).T
flattened_df_AAPL_2015_to_2019 = pd.DataFrame.from_dict(AAPL_15_to_2019_flattened_dict).T
flattened_df_AMZN_2015_to_2019 = pd.DataFrame.from_dict(AMZN_15_to_2019_flattened_dict).T

In [1]:
#merge the dataframes on the index 'Date', and ensure that only the dates present in the flattened dataframe are carried over

flattened_df_MSFT_98_to_2015 = flattened_df_MSFT_98_to_2015.merge(MSFT_labels_98_to_2015_df, left_index=True, right_index=True)
flattened_df_AAPL_98_to_2015 = flattened_df_AAPL_98_to_2015.merge(AAPL_labels_98_to_2015_df, left_index=True, right_index=True)
flattened_df_AMZN_98_to_2015 = flattened_df_AMZN_98_to_2015.merge(AMZN_labels_98_to_2015_df, left_index=True, right_index=True)

flattened_df_MSFT_2015_to_2019 = flattened_df_MSFT_2015_to_2019.merge(MSFT_labels_2015_to_2019_df, left_index=True, right_index=True)
flattened_df_AAPL_2015_to_2019 = flattened_df_AAPL_2015_to_2019.merge(AAPL_labels_2015_to_2019_df, left_index=True, right_index=True)
flattened_df_AMZN_2015_to_2019 = flattened_df_AMZN_2015_to_2019.merge(AMZN_labels_2015_to_2019_df, left_index=True, right_index=True)

flattened_df_MSFT_98_to_2015



NameError: name 'flattened_df_MSFT_98_to_2015' is not defined

In [17]:
#export the dataframes as CSV's 
export_MSFT_98= flattened_df_MSFT_98_to_2015.to_csv(r'MSFT_flat_98_to_2015.csv')
export_AMZN_98= flattened_df_AAPL_98_to_2015.to_csv(r'AMZN_flat_98_to_2015.csv') 
export_AAPL_98= flattened_df_AMZN_98_to_2015.to_csv(r'AAPL_flat_98_to_2015.csv')


export_MSFT_15= flattened_df_MSFT_2015_to_2019.to_csv(r'MSFT_flat_2015_2019.csv')
export_AAPL_15= flattened_df_AAPL_2015_to_2019.to_csv(r'AAPL_flat_2015_2019.csv')
export_AMZN_15= flattened_df_AMZN_2015_to_2019.to_csv(r'AMZN_flat_2015_2019.csv')




