In [14]:
# We import pandas into Python
import pandas as pd

# We read in a stock data data file into a data frame and see what it looks like
df = pd.read_csv('./GOOG.csv')

# We display the first 5 rows of the DataFrame
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,49.676899,51.693783,47.669952,49.845802,49.845802,44994500
1,2004-08-20,50.178635,54.187561,49.925285,53.80505,53.80505,23005800
2,2004-08-23,55.017166,56.373344,54.172661,54.346527,54.346527,18393200
3,2004-08-24,55.260582,55.439419,51.450363,52.096165,52.096165,15361800
4,2004-08-25,52.140873,53.651051,51.604362,52.657513,52.657513,9257400


In [15]:
# We load the Google stock data into a DataFrame
google_stock = pd.read_csv('GOOG.csv',usecols=["Date","Adj Close"],index_col=["Date"],parse_dates=True)

# We load the Apple stock data into a DataFrame
apple_stock = pd.read_csv("AAPL.csv",usecols=["Date","Adj Close"],index_col=["Date"],parse_dates=True)

# We load the Amazon stock data into a DataFrame
amazon_stock = pd.read_csv("AMZN.csv",usecols=["Date","Adj Close"],index_col=["Date"],parse_dates=True)

In [16]:
# We display the google_stock DataFrame
google_stock.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2004-08-19,49.845802
2004-08-20,53.80505
2004-08-23,54.346527
2004-08-24,52.096165
2004-08-25,52.657513


In [17]:
# We create calendar dates between '2000-01-01' and  '2016-12-31'
dates = pd.date_range('2000-01-01', '2016-12-31')

# We create and empty DataFrame that uses the above dates as indices
all_stocks = pd.DataFrame(index = dates)

In [18]:
# Change the Adj Close column label to Google
google_stock = google_stock.rename(columns = {'Adj Close': 'Google'})

# # Change the Adj Close column label to Apple
apple_stock = apple_stock.rename(columns={"Adj Close":"Apple"})

# # Change the Adj Close column label to Amazon
amazon_stock = amazon_stock.rename(columns={"Adj Close":"Amazon"})

In [19]:
# We display the google_stock DataFrame
google_stock.head()

Unnamed: 0_level_0,Google
Date,Unnamed: 1_level_1
2004-08-19,49.845802
2004-08-20,53.80505
2004-08-23,54.346527
2004-08-24,52.096165
2004-08-25,52.657513


In [20]:
# We join the Google stock to all_stocks
all_stocks = all_stocks.join(google_stock)

# We join the Apple stock to all_stocks
all_stocks = all_stocks.join(apple_stock)

# We join the Amazon stock to all_stocks
all_stocks = all_stocks.join(amazon_stock)

In [21]:
# We display the google_stock DataFrame
all_stocks.head()

Unnamed: 0,Google,Apple,Amazon
2000-01-01,,,
2000-01-02,,,
2000-01-03,,3.596616,89.375
2000-01-04,,3.293384,81.9375
2000-01-05,,3.341579,69.75


In [22]:
# Check if there are any NaN values in the all_stocks dataframe
all_stocks.isnull().sum()

# Remove any rows that contain NaN values
all_stocks = all_stocks.dropna(axis=0)
all_stocks.head()

Unnamed: 0,Google,Apple,Amazon
2004-08-19,49.845802,1.97346,38.630001
2004-08-20,53.80505,1.979244,39.509998
2004-08-23,54.346527,1.997236,39.450001
2004-08-24,52.096165,2.053144,39.049999
2004-08-25,52.657513,2.123831,40.299999


In [23]:
# Print the average stock price for each stock
print("Mean Of Each Stock")
print(all_stocks.mean())
print()
# Print the median stock price for each stock
print("Median Of Each Stock")
print(all_stocks.median())
print()
# Print the standard deviation of the stock price for each stock  
print("Median Of Each Stock")
print(all_stocks.std())
print()
# Print the correlation between stocks
print("Median Of Each Stock")
print(all_stocks.corr())
print()

Mean Of Each Stock
Google    347.420229
Apple      47.736018
Amazon    216.598177
dtype: float64

Median Of Each Stock
Google    286.397247
Apple      39.461483
Amazon    161.820007
dtype: float64

Median Of Each Stock
Google    187.671596
Apple      37.421555
Amazon    199.129792
dtype: float64

Median Of Each Stock
          Google     Apple    Amazon
Google  1.000000  0.900242  0.952444
Apple   0.900242  1.000000  0.886321
Amazon  0.952444  0.886321  1.000000



In [24]:
# We compute the rolling mean using a 150-Day window for Google stock
rollingMean = all_stocks["Google"].rolling(150).mean()
all_stocks.head()

Unnamed: 0,Google,Apple,Amazon
2004-08-19,49.845802,1.97346,38.630001
2004-08-20,53.80505,1.979244,39.509998
2004-08-23,54.346527,1.997236,39.450001
2004-08-24,52.096165,2.053144,39.049999
2004-08-25,52.657513,2.123831,40.299999


In [25]:
# this allows plots to be rendered in the notebook
%matplotlib inline 

# We import matplotlib into Python
import matplotlib.pyplot as plt


# We plot the Google stock data
plt.plot(all_stocks['Google'])

# We plot the rolling mean ontop of our Google stock data
plt.plot(all_stocks['Apple'])
plt.plot(all_stocks['Amazon'])
plt.legend(['Google Stock Price', 'Rolling Mean','Amazon'])
plt.show()

ModuleNotFoundError: No module named 'matplotlib'