### Fetch daily closing stock price for TSLA, normalize & categorize to correct labels

In [4]:
from alpha_vantage.timeseries import TimeSeries
from pprint import pprint
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [5]:
#TESLA Stock data
ts = TimeSeries(key='X5AYBIDIH2EVGNW9', output_format='pandas')
data, meta_data = ts.get_daily(symbol='TSLA', outputsize='full')

data = data.drop(['2. high', '3. low', '5. volume'], axis=1)
data.columns = ['open', 'close']

In [6]:
def calculate_pct_change(open, close):
    pct_change = (close - open) / open
    pct_change = round(pct_change,4) * 100
    return pct_change

In [7]:
#S&P 500 ETF data
sdata, smeta_data = ts.get_daily(symbol='SPY', outputsize='full')

sdata = sdata.drop(['2. high', '3. low', '5. volume'], axis=1)
sdata.columns = ['open', 'close']

In [52]:
data = calculate_pct_change(data.open, data.close) - calculate_pct_change(sdata.open, sdata.close)
data = pd.DataFrame({'Date': data.index, 'price_change': data.values})
data = data.set_index('Date')
# data["signal"] = data['price_change'].map(lambda x: "stay" if -1<x<1 else ("up" if x>1 else "down"))
# data = data.drop(['price_change'], axis = 1)

In [19]:
data = data.loc[(data.index > '2012-11-16') & ('2017-09-29' >= data.index)]

In [20]:
data['signal'].value_counts()

stay    504
down    374
up      347
Name: signal, dtype: int64

In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1225 entries, 2012-11-19 to 2017-09-29
Data columns (total 1 columns):
signal    1225 non-null object
dtypes: object(1)
memory usage: 19.1+ KB


In [54]:
data.tail()

Unnamed: 0_level_0,price_change
Date,Unnamed: 1_level_1
2019-01-16,0.31
2019-01-17,-0.81
2019-01-18,-6.98
2019-01-22,-1.2
2019-01-23,0.89


In [9]:
# data.to_csv(r'/Users/flatironschool/Downloads/Learn/Projects/Stock/project_mod4_AAR/stock_data.csv')

---

### Interpolation update & change 3-categorical label to 2

In [56]:
df = calculate_pct_change(data.open, data.close) - calculate_pct_change(sdata.open, sdata.close)
df = pd.DataFrame({'Date': df.index, 'price_change': df.values})
df = df.set_index('Date')
# df["signal"] = data['price_change'].map(lambda x: "stay" if -1<x<1 else ("up" if x>1 else "down"))
# df = data.drop(['price_change'], axis = 1)

In [57]:
df.tail()

Unnamed: 0_level_0,price_change
Date,Unnamed: 1_level_1
2019-01-16,0.31
2019-01-17,-0.81
2019-01-18,-6.98
2019-01-22,-1.2
2019-01-23,0.21


In [58]:
df = df.loc[(df.index > '2012-11-16') & ('2017-09-29' >= df.index)]

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1225 entries, 2012-11-19 to 2017-09-29
Data columns (total 1 columns):
price_change    1225 non-null float64
dtypes: float64(1)
memory usage: 19.1+ KB


In [60]:
idx = pd.date_range('2012-11-19', '2017-09-29')

In [63]:
df.index = pd.DatetimeIndex(df.index)
df = df.reindex(idx, fill_value=np.nan)

In [64]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1776 entries, 2012-11-19 to 2017-09-29
Freq: D
Data columns (total 1 columns):
price_change    1225 non-null float64
dtypes: float64(1)
memory usage: 27.8 KB


In [40]:
# df = df.astype(float)

In [70]:
df['update_price_change'] = df['price_change'].interpolate(method='linear')

In [76]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1776 entries, 2012-11-19 to 2017-09-29
Freq: D
Data columns (total 2 columns):
price_change           1225 non-null float64
update_price_change    1776 non-null float64
dtypes: float64(2)
memory usage: 41.6 KB


In [71]:
df.head()

Unnamed: 0,price_change,update_price_change
2012-11-19,1.76,1.76
2012-11-20,0.41,0.41
2012-11-21,-0.53,-0.53
2012-11-22,,-1.42
2012-11-23,-2.31,-2.31


In [78]:
df = df.drop(columns=['price_change'])

In [81]:
df.columns = ['price_change']

In [85]:
df.head()

Unnamed: 0,signal
2012-11-19,up
2012-11-20,stay
2012-11-21,stay
2012-11-22,down
2012-11-23,down


In [83]:
df["signal"] = df['price_change'].map(lambda x: "stay" if -1<x<1 else ("up" if x>1 else "down"))
df = df.drop(['price_change'], axis = 1)

In [86]:
df.tail()

Unnamed: 0,signal
2017-09-25,down
2017-09-26,down
2017-09-27,down
2017-09-28,stay
2017-09-29,stay


In [87]:
df.to_csv(r'/Users/sproul/Desktop/ds-projects/project_mod4_AAR/interpolated_stock_data.csv')