In [1]:
!python --version

Python 3.8.5


In [2]:
import pandas as pd
import numpy as np
import datetime
import plotly as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 100% !important;}</style>"))
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

## Read CSV
df = pd.read_csv('HistoricalQuotes-2.csv', parse_dates=['Date'])

In [3]:
# this gives the dimensions/shape of the df
df.shape

(2518, 6)

In [4]:
# sample
df.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,2020-12-14,$123.33,2774089,$124.45,$125,$122.2
1,2020-12-11,$123.02,4987934,$129.525,$129.6599,$122.74
2,2020-12-10,$130.47,4567854,$125.69,$134.085,$125.69
3,2020-12-09,$128.84,3469307,$128.24,$132.92,$125.75
4,2020-12-08,$126.28,1693465,$125.82,$127.4,$124.05


In [5]:
df[1:5]

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1,2020-12-11,$123.02,4987934,$129.525,$129.6599,$122.74
2,2020-12-10,$130.47,4567854,$125.69,$134.085,$125.69
3,2020-12-09,$128.84,3469307,$128.24,$132.92,$125.75
4,2020-12-08,$126.28,1693465,$125.82,$127.4,$124.05


In [6]:
df.columns = ['date', 'close_price', 'vol', 'open', 'high' ,'low']
df = df.sort_values(by='date', ascending=True)

In [7]:
float('1')

1.0

In [8]:
df_closing_price = df[['close_price', 'vol']]
df_closing_price.index = df.date

def to_number(x):
    x = x.strip()
    #print(x)
    if len(x) != 0:
        #print(x[1:])
        return float(x[1:])
    else:
        return 0

In [9]:
pd.options.mode.chained_assignment = None
df_closing_price.loc[:,'close_price'] = df_closing_price['close_price'].apply(lambda x: to_number(x))

In [10]:
df_closing_price[:5]

Unnamed: 0_level_0,close_price,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-12-14,25.0537,5810880
2010-12-15,25.1762,5818330
2010-12-16,25.4604,3082137
2010-12-17,26.2934,8688177
2010-12-20,26.362,4615019


In [11]:
# print types
df_closing_price.dtypes

close_price    float64
vol              int64
dtype: object

In [12]:
def plotly_df(df, title='', annotations=None):
    '''Visualize all the dataframe columns as line plots'''
    xaxis = dict(title='Timesteps')
    common_kw = dict(x=df.index, mode='lines + markers')
    data = [go.Scatter(y=df[c], name=c, **common_kw) for c in df.columns]
    layout = dict(title=title, showlegend=True, annotations=annotations,xaxis=xaxis)
    fig = dict(data=data, layout=layout)
    iplot(fig, show_link=False)

In [15]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x= df_closing_price.index, y=df_closing_price.close_price, name="Price"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_closing_price.index, y=df_closing_price.vol, name="Volume"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Expedia Price, Volume Plots"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Price", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume", secondary_y=True)

fig.show()

In [14]:
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt 
from sklearn.impute import SimpleImputer

data = df_closing_price[['close_price']]
for v in data.values:
    print(v[0])

25.0537
25.1762
25.4604
26.2934
26.362
26.6462
26.0876
25.8034
25.0586
24.9704
25.0684
24.7744
24.5882
24.2942
23.9316
24.9214
24.598
25.1076
24.9998
25.8328
26.117
26.5776
26.754
26.1562
25.8132
25.7642
25.4604
25.4604
25.2448
24.8332
24.8822
24.4804
24.6568
24.8136
24.7254
24.4804
24.745
24.794
25.4604
24.9214
25.1762
20.8838
21.2562
21.2758
20.7662
20.5898
20.5408
19.9528
19.9136
19.8646
19.7372
19.4628
19.2986
19.502
19.796
20.3644
20.58
21.168
21.021
21.0406
21.364
21.266
21.2562
21.0602
21.1092
20.8936
21.168
21.1435
21.0994
21.4228
21.3934
21.1876
21.5502
22.3734
22.2068
21.8148
21.9814
22.2656
22.0304
21.952
24.794
24.0296
23.7258
23.5102
23.177
23.1868
22.9418
22.8242
23.1182
23.3632
23.8924
23.8924
23.765
24.1864
24.5196
24.8626
24.4804
24.2256
24.2354
24.7548
24.892
24.843
24.5784
24.6176
24.6666
24.451
25.0488
26.1072
26.7099
27.0186
26.999
26.8128
26.7834
26.8128
27.1362
27.4498
27.244
27.3567
27.0774
26.3914
26.3522
26.607
26.7736
26.46
26.9304
27.1656
26.6854
26.4698
26.

In [None]:
df_closing_price