# **Finance Data Project**
## In this data project we will focus on exploratory data analysis of stock prices.
### We'll focus on bank stocks and see how they progressed throughout the financial crisis.


In [None]:
from pandas_datareader import data, wb
import pandas as pd
import numpy as np
import datetime
import seaborn as sns
%matplotlib inline

## Data
### We will get stock information for the following banks:

In [None]:
df = pd.read_pickle('../input/dataset/all_banks')
df['BAC']

# Bank of America
BAC = df['BAC']

# CitiGroup
C = df['C']

# Goldman Sachs
GS = df['GS']

# JPMorgan Chase
JPM = df['JPM']

# Morgan Stanley
MS = df['MS']

# Wells Fargo
WFC = df['WFC']

In [None]:
#Create a list of the ticker symbols (as strings) in alphabetical order. Call this list: tickers

tickers = ['BAC', 'C', 'GS', 'JPM', 'MS', 'WFC']

#Use pd.concat to concatenate the bank dataframes together to a single data frame called bank_stocks. Set the keys argument equal to the tickers list. Also pay attention to what axis you concatenate on.

bank_stocks = pd.concat([BAC, C, GS, JPM, MS, WFC],axis=1,keys=tickers)

#Set the column name levels

bank_stocks.columns.names = ['Bank Ticker','Stock Info']

#Check the head of the bank_stocks dataframe.

bank_stocks.head()

# EDA
## Let's explore the data a bit! 

### What is the max Close price for each bank's stock throughout the time period?

In [None]:
bank_stocks.xs(key='Close',axis=1,level='Stock Info').max()

Create a new empty DataFrame called returns. This dataframe will contain the returns for each bank's stock. returns are typically defined by:

# $rt=\frac{p_t−p_{t−1}}{p_{t−1}} $

In [None]:
returns = pd.DataFrame()

#We can use pandas pct_change() method on the Close column to create a column representing this return value.
#  Create a for loop that goes and for each Bank Stock Ticker creates this returns column and set's it as a column in the returns DataFrame.

for tick in tickers:
    returns[tick+' Return'] = bank_stocks[tick]['Close'].pct_change()
returns.head()

In [None]:
#Create a pairplot using seaborn of the returns dataframe.
sns.pairplot(returns[1:])

#Using this returns DataFrame, figure out on what dates each bank stock had the best and worst single day returns.
# You should notice that 4 of the banks share the same day for the worst drop.

In [None]:
returns.idxmin()

In [None]:
returns.idxmax()

In [None]:
#Take a look at the standard deviation of the returns, which stock would you classify as the riskiest over the entire time period? 
# Which would you classify as the riskiest for the year 2015?
returns.std()

In [None]:
returns.loc['2015-01-01':'2015-12-31'].std()

In [None]:
#Create a distplot using seaborn of the 2015 returns for Morgan Stanley
sns.distplot(returns.loc['2015-01-01':'2015-12-31']['MS Return'],color='green',bins=100)

In [None]:
#Create a distplot using seaborn of the 2008 returns for CitiGroup

sns.distplot(returns.loc['2008-01-01':'2008-12-31']['C Return'],color='red',bins=100)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# Optional Plotly Method Imports
import plotly
import cufflinks as cf
cf.go_offline()

In [None]:
#Create a line plot showing Close price for each bank for the entire index of time.(2 ways to do)

for tick in tickers:
    bank_stocks[tick]['Close'].plot(figsize=(12,4),label=tick)
plt.legend()

In [None]:
bank_stocks.xs(key='Close',axis=1,level='Stock Info').plot()

In [None]:
bank_stocks.xs(key='Close',axis=1,level='Stock Info').iplot()

## Moving Averages
Let's analyze the moving averages for these stocks in the year 2008.

Plot the rolling 30 day average against the Close Price for Bank Of America's stock for the year 2008

In [None]:
plt.figure(figsize=(12,6))
BAC['Close'].loc['2008-01-01':'2009-01-01'].rolling(window=30).mean().plot(label='30 Day Avg')
BAC['Close'].loc['2008-01-01':'2009-01-01'].plot(label='BAC CLOSE')
plt.legend()

In [None]:
#Create a heatmap of the correlation between the stocks Close Price.

sns.heatmap(bank_stocks.xs(key='Close',axis=1,level='Stock Info').corr(),annot=True)

In [None]:
#Use seaborn's clustermap to cluster the correlations together:

sns.clustermap(bank_stocks.xs(key='Close',axis=1,level='Stock Info').corr(),annot=True)

In [None]:
close_corr = bank_stocks.xs(key='Close',axis=1,level='Stock Info').corr()
close_corr.iplot(kind='heatmap',colorscale='rdylbu')