# Importing Libraries 

In [1]:
import pandas as pd
import numpy as np
import sys

### Importing Data as pandas dataframe and preprocessing it

In [2]:
df = pd.read_csv("AMZN 2010-2020.csv",error_bad_lines=False)

In [3]:
df = df.drop('High',axis=1)
df = df.drop('Low',axis=1)
df = df.drop('Adj Close',axis=1)
df = df.drop('Volume',axis=1)

In [4]:
df.describe()

Unnamed: 0,Open,Close
count,2517.0,2517.0
mean,755.984549,755.999992
std,633.737848,633.711304
min,105.93,108.610001
25%,253.899994,253.369995
50%,430.070007,429.920013
75%,1141.0,1152.349976
max,2500.0,2497.939941


In [5]:
df.head()

Unnamed: 0,Date,Open,Close
0,2010-06-01,124.970001,123.239998
1,2010-06-02,124.019997,126.309998
2,2010-06-03,126.25,128.759995
3,2010-06-04,126.330002,122.769997
4,2010-06-07,125.839996,122.010002


### Setting up the parameters for the model 

In [6]:
df['Difference'] = df['Close'] - df['Open']

In [7]:
df.head()

Unnamed: 0,Date,Open,Close,Difference
0,2010-06-01,124.970001,123.239998,-1.730003
1,2010-06-02,124.019997,126.309998,2.290001
2,2010-06-03,126.25,128.759995,2.509995
3,2010-06-04,126.330002,122.769997,-3.560005
4,2010-06-07,125.839996,122.010002,-3.829994


In [8]:
df.describe()

Unnamed: 0,Open,Close,Difference
count,2517.0,2517.0,2517.0
mean,755.984549,755.999992,0.015443
std,633.737848,633.711304,15.161938
min,105.93,108.610001,-121.119995
25%,253.899994,253.369995,-3.679993
50%,430.070007,429.920013,0.030029
75%,1141.0,1152.349976,4.059997
max,2500.0,2497.939941,128.870117


### Setting up the range  

In [9]:
criteria = [df['Difference'].between(-140, -1.5), df['Difference'].between(-1.5, 1.5), df['Difference'].between(1.5,190)]
values = ['Down', 'Stagnant', 'Up']

df['Movement'] = np.select(criteria, values, 'Stagnant')

In [10]:
df.head()

Unnamed: 0,Date,Open,Close,Difference,Movement
0,2010-06-01,124.970001,123.239998,-1.730003,Down
1,2010-06-02,124.019997,126.309998,2.290001,Up
2,2010-06-03,126.25,128.759995,2.509995,Up
3,2010-06-04,126.330002,122.769997,-3.560005,Down
4,2010-06-07,125.839996,122.010002,-3.829994,Down


### Showing number of States  

In [11]:
df['Movement'].value_counts()

Up          1003
Down         949
Stagnant     565
Name: Movement, dtype: int64

In [12]:
z = list(df['Movement'])

### Transition Probability Matrix 

In [13]:
pd.crosstab(pd.Series(z[1:],name='Tomorrow'),
            pd.Series(z[:-1],name='Today'),normalize=0)

Today,Down,Stagnant,Up
Tomorrow,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Down,0.392405,0.203586,0.404008
Stagnant,0.320354,0.302655,0.376991
Up,0.394816,0.200399,0.404786


In [14]:
from itertools import islice

def window(seq, n=2):
    "Sliding window width n from seq.  From old itertools recipes."""
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [15]:
import pandas as pd

pairs = pd.DataFrame(window(z), columns=['Today', 'Tomorrow'])
counts = pairs.groupby('Today')['Tomorrow'].value_counts()
probs = (counts).unstack()

### Transition Count Matrix

In [16]:
probs

Tomorrow,Down,Stagnant,Up
Today,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Down,372,181,396
Stagnant,193,171,201
Up,383,213,406


In [17]:
df['Difference'].abs().mean()

8.228157069129907

# End