In [8]:
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import mpl_finance as mpl
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates
from matplotlib.dates import date2num
from matplotlib import style
import bs4 #beautifulsoup4
import sklearn #scikit_learn
import numpy as np
import datetime as dt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook
from scipy.stats import pearsonr
import seaborn as sns

In [9]:
style.use('ggplot')

In [10]:
start = dt.datetime(2008,1,1)
end = dt.datetime(2017,12,31)

# Pharceutical Industry

In [11]:
df_pfe= web.DataReader('PFE','quandl', start,end)
df_mrk= web.DataReader('MRK','quandl', start,end)
df_jnj= web.DataReader('JNJ','quandl', start,end)
df_abbv= web.DataReader('ABBV','quandl', start,end)

RemoteDataError: Unable to read URL: https://www.quandl.com/api/v3/datasets/WIKI/JNJ.csv?start_date=2008-01-01&end_date=2017-12-31&order=asc
Response Text:
b'code,message\n"QELx01","You have exceeded the anonymous user limit of 50 calls per day. To make more calls today, please register for a free Quandl account and then include your API key with your requests."'

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df_pfe.index, df_pfe["AdjClose"],label="Pfizer")
plt.plot(df_mrk.index, df_mrk["AdjClose"],label="Merck & Co")
plt.plot(df_jnj.index, df_jnj["AdjClose"],label = "J&J")
plt.plot(df_abbv.index, df_abbv["AdjClose"],label = "AbbVieI")
plt.title("Top 4 Pharmeceutical companies stocks (2008-2018)")
plt.ylabel("Stock Price")
plt.xlabel("Year")
plt.legend()
#plt.savefig("Pharmeceutical stocks.png")

In [None]:
pharmaceutical_df = pd.DataFrame({"PFE_Adjclose":df_pfe["AdjClose"],
                                  "MRK_Adjclose":df_mrk["AdjClose"],
                                  "JNJ_Adjclose":df_jnj["AdjClose"],
                                  "ABBV_Adjclose":df_abbv["AdjClose"]
                                 
                                 
                                 }) 

pharmaceutical_df_nona = pharmaceutical_df.dropna()
pharmaceutical_df.head()

In [None]:
pd.scatter_matrix(pharmaceutical_df, alpha = 0.3, figsize = (14,8), diagonal = 'kde')
#plt.savefig("Pharmaceutical.png")

In [None]:
correlation_df = pharmaceutical_df.corr(method="pearson")
correlation_df.head()

In [None]:
def calculate_pvalues(df):
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = pearsonr(df[r], df[c])[1]
    return pvalues

In [None]:
calculate_pvalues(pharmaceutical_df)

In [None]:
sns.heatmap(correlation_df,cmap = "PuBu",mask =False, annot=True)
#plt.savefig("Pharmeceutical correlation.png")

# Retail Industry

In [None]:
df_wal= web.DataReader('WMT','quandl', start,end)
df_target= web.DataReader('TGT','quandl', start,end)
df_amazon= web.DataReader('AMZN','quandl', start,end)
df_costco= web.DataReader('COST','quandl', start,end)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df_wal.index, df_wal["AdjClose"],label="Walmart")
plt.plot(df_target.index, df_target["AdjClose"],label="Target")
plt.plot(df_amazon.index, df_amazon["AdjClose"],label = "Amazon")
plt.plot(df_costco.index, df_costco["AdjClose"],label = "Costco")
plt.title("Top 4 Retail companies stocks (2008-2018)")
plt.ylabel("Stock Price")
plt.xlabel("Year")
plt.legend()
#plt.savefig("Retail stocks.png")

Bezos claimed that Amazon.com was not a retailer but a technology company. To underscore the point, in 2002 the company launched Amazon Web Services (AWS), which initially offered data on Internet traffic patterns.<br>
S3 and EC2 quickly succeeded and helped popularize the idea that companies and individuals do not need to own computing resources; they can rent them as needed over the Internet, or “in the cloud.” For example, in 2007, soon after launch, the S3 service contained more than 10 billion objects, or files; five years later, it held more than 905 billion. AWS is even used by Amazon.com’s rivals, such as Netflix, which uses both S3 and EC2 for its competing video streaming service.



In [None]:
Retail_df = pd.DataFrame({"Walmart_Adjclose":df_wal["AdjClose"],
                                  "Target_Adjclose":df_target["AdjClose"],
                                  "Amazon_Adjclose":df_amazon["AdjClose"],
                                  "Costco_Adjclose":df_costco["AdjClose"]
                                 
                                 
                                 }) 

Retail_df_nona = Retail_df.dropna()
Retail_df_nona.head()

In [None]:
sns.regplot(Retail_df_nona["Target_Adjclose"],Retail_df_nona["Amazon_Adjclose"])
#plt.savefig("Amazon vs. Target.png")

In [None]:
sns.regplot(Retail_df_nona["Walmart_Adjclose"],Retail_df_nona["Amazon_Adjclose"])
plt.savefig("Amazon vs. Walmart.png")

In [None]:
correlation_df = Retail_df.corr(method="pearson")
correlation_df

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(correlation_df,cmap = "OrRd",mask =False, annot=True)
plt.savefig("Retail correlation.png")

In [None]:
calculate_pvalues(Retail_df)