# Stock Correlation

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_datareader as web
from datetime import datetime as dt

start = dt(2017, 1, 1)
symbols_list = ["AAPL", "MSFT", "GOOGL", "AMZN", "GE", "IBM", "TSLA", "FB", "AMD", "NVDA"]
symbols = []

for ticker in symbols_list:
    r = web.DataReader(ticker, "yahoo", start)
    r["Symbol"] = ticker
    symbols.append(r)

df = pd.concat(symbols)
df = df.reset_index()
df = df[["Date", "Close", "Symbol"]]
df.head()
df_pivot = df.pivot("Date", "Symbol", "Close").reset_index()
df_pivot.head()

corr_df = df_pivot.corr(method = 'pearson')
corr_df.head().reset_index()
corr_df.head(10)

plt.fiqure(figsize = (13, 8))
sns.heatmap(corr_df, annot = True, cmap = "RdYlGn")
plt.show()

# Python for Finance

In [None]:
%pip install yfinance
import yfinance as yf

Amazon = yf.Ticker("AMZN")
print(Amazon.info["sector"])
print(Amazon.info["trailingPE"])
print(Amazon.info.keys())
print(Amazon.history(period = "max"))

GetTesla = yf.Ticker("TSLA")
print(GetTesla.history(period = "6mo"))

In [None]:
%pip install mplfinance
import mplfinance as mpf
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mpdates
import pandas as pd
plt.style.use('dark_background')

df = pd.read_csv('RELIANCE.NS_.csv')
df = df[['Date', 'Open', 'High', 'Low', 'Close']]

df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].apply(mpdates.date2num)

fig, ax = plt.subplots()
candlestick_ohlc(ax, df.values, width = 0.6,
                 colorup = 'green', colordown = 'red',
                 alpha = 0.8)
ax.grid(True)
ax.set_xlabel('Date')
ax.set_ylabel('Price')

date_format = mpdates.DateFormatter('%d-%m-%Y')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
fig.tight_layout()
plt.show()                               

In [None]:
plt.style.use('default')
%matplotlib inline

reliance = pd.read_csv('RELIANCE.NS.csv', index_col = 'Date', parse_dates = True)
reliance.head()

reliance = reliance['Close'].to_frame()
reliance['SMA30'] = reliance['Close'].rolling(30).mean()
reliance.dropna(inplace = True)
print(reliance)

reliance(['Close', 'SMA30']).plot(label = "RELIANCE", figsize = (16, 8))

In [None]:
reliance = reliance['Close'].to_frame()
reliance['EWMA30'] = reliance['close'].ewm(span = 30).mean()
print(reliance) 
  
reliance[['Close', 'EWMA30']].plot(label = 'RELIANCE', figsize = (16, 8))

# Case Study on Amazon Stock

In [None]:
amzn = pd.read_csv('AMZN.csv')
amzn.head()
amzn.describe()
amzn.info()

amzn["Date"] = pd.to_datetime(amzn["Date"])
amzn.set_index("Date", inplace = True)
amzn["diff"] = amzn["Open"] - amzn["Close"]

plt.figure(figsize=(14, 10))
plt.plot(amzn["Close"])
plt.grid()
plt.xlabel('Year')
plt.ylabel('Close Price')
plt.title("Amazon Stock Price")
plt.show()

amzn = amzn.loc[pd.Timestamp('2015-01-01'), pd.Timestamp('2019-12-31')]
amzn.head()

plt.figure(figsize=(14, 10))
plt.plot(amzn["Close"])
plt.grid()
plt.xlabel('Year')
plt.ylabel('Close Price')
plt.title("Amazon Stock Price")
plt.show()

daily_close = amzn[["Adj Close"]]
daily_pct_c = daily_close.pct_change()
daily_pct_c.fillna(0, inplace = True)
print(daily_pct_c)

daily_pct_c.hist(bins = 50)
plt.show()
print(daily_pct_c.describe())

pd.plotting.scatter_matrix(daily_pct_c, diagonal = 'kde', alpha = 0.1, figsize = (8, 8))
plt.show()

adj_close_px = amzn["Adj Close"]
amzn["50"] = adj_close_px.rolling(window = 50).mean()
amzn["100"] = adj_close_px.rolling(window = 100).mean()

plt.figure(figsize =(16, 12))
plt.plot(amzn["Adj Close"], label = "Adj Close")
plt.plot(amzn["50"], label = "50")
plt.plot(amzn["100"], label = "100")
plt.title("Simple Moving Average", fontsize = 20)
plt.grid()
plt.legend()
plt.show()

plt.figure(figsize = (16, 12))
plt.plot(adj_close_px, label = "Adj Close")
plt.plot(adj_close_px.ewn(span = 50, min_periods = 0, adjust = False).mean(), label = "50")
plt.plot(adj_close_px.ewn(span = 100, min_periods = 0, adjust = False).mean(), label = "100")
plt.title("Expontial Moving Average", fontsize = 20)
plt.legend()
plt.grid()
plt.show()

import numpy as np

min_periods = 75
vol = daily_pct_c.rolling(min_periods).std() * np.sqrt(min_periods)
plt.figure(figsize = (10,8))
plt.plot(vol)
plt.title("Volatility of Amazon Stocks")
plt.show()

short_window = 13
long_window = 40

signals = pd.DataFrame(index = amzn.index)
signals["signal"] = 0.0

signals["short_mavg"] = amzn["Close"].rolling(window = short_window, min_periods = 1, center = False).mean()
signals["long_mavg"] = amzn["Close"].rolling(window = long_window, min_periods = 1, center = False).mean()
signals["signal"][short_window] = np.where(signals["short_mavg"][short_window] 
                                           > signals["long_mavg"][short_window], 1.0, 0.0)

signals["positions"] = signals["signal"].dif()

fig = plt.figure(figsize=(14,10))
ax1 = fig.add_subplot(111, ylabel = "Price in $")
amzn["Close"].plot(ax = ax1, lw = 2)
signals[['short_mavg', 'long_mavg']].plot(ax = ax1, lw = 2)

ax1.plot(signals.loc[signals.positions == 1.0].index, 
         signals.short_mavg[signals.positions == 1.0],
         '^', markersize = 10, color = 'red')
ax1.plot(signals.loc[signals.positions == -1.0].index,
         signals.short_mavg[signals.positions == -1.0],
         'v', markersize = 10, color = 'black')
plt.show()

initial_capitals = float(100000.0)
positions = pd.DataFrame(index = signals.index).fillna(0.0)
positions["AMZN"] = 100 * signals["signal"]
portfolio = positions.multiply(amzn["Adj Close"], axis = 0)
pos_diff = positions.diff()
pos_diff = positions.diff()
portfolio["holdings"] = (positions.multiply(amzn["Adj Close"], axis = 0)).sum(axis = 1)
portfolio["cash"] = initial_capitals - (pos_diff.multiply(amzn["Adj Close"], axis = 0)).sum(axis = 1).cumsum()
portfolio["total"] = portfolio["cash"] + portfolio["holdings"]
portfolio["returns"] = portfolio["total"].pct_change()
portfolio.head()

fig = plt.figure(figsize=(14,10))
ax1 = fig.add_subplot(111, ylabel = "Price in $")
amzn["Close"].plot(ax = ax1, lw = 2)
signals[['short_mavg', 'long_mavg']].plot(ax = ax1, lw = 2)

ax1.plot(signals.loc[signals.positions == 1.0].index, 
         signals.short_mavg[signals.positions == 1.0],
         '^', markersize = 10, color = 'red')
ax1.plot(signals.loc[signals.positions == -1.0].index,
         signals.short_mavg[signals.positions == -1.0],
         'v', markersize = 10, color = 'black')
plt.show()

# Machine Learning in Finance

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
sns.set_theme(style = 'darkgrid')
%matplotlib inline

df = pd.read_csv('gld_price_data.csv')
df.head()
df.info()
df.isnull().sum()
df.duplicated().sum()

plt.title("SPX Distrution")
sns.histplot(data = df, x = "SPX", kde = True, color = 'green')
plt.show()

plt.title("SPX Distrution")
sns.histplot(data = df, x = "SLV", kde = True, color = 'green')
plt.show()

plt.title("SPX Distrution")
sns.histplot(data = df, x = "EUR/USD", kde = True, color = 'green')
plt.show()

correlation = df.corr()
sns.heatmap(correlation, cbar = True, square = True, fmt = '.1f', annot = True, annot_kws = {"size":8}, cmap = 'Greens')
plt.show()

correlation["GLD"]
x = df.drop(columns = ["Date", "GLD"], axis = 1)
y = df["GLD"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 2)

model = RandomForestRegressor(n_estimators = 100)
model.fit(x_train, y_train)
train_date_predictions = model.predict(x_train)
test_date_predictions = model.predict(x_test)
r2_score(test_date_predictions, y_test)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
import seaborn as sb

import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("TSLA.csv")
df.head()
print(df.head(), df.shape, df.info(), df.isnull().sum(), df.duplicated().sum(), df.describe())

plt.figure(figsize = (15, 5))
plt.plot(df["Close"])
plt.title("Tesla Stock Price", fontsize = 15)
plt.ylabel("Price in dollars")
plt.show()

df[df["Close"] == df["Adj Close"]].shape

features = ["Open", "High", "Low", "Volume"]
plt.subplots(figsize = (20, 10))
for i, col in enumerate(features):
    plt.subplot(2, 3, i+1)
    sb.distplot(df[col])
plt.show()

models = [LogisticRegression(), SVC(kernals = 'poly', probability = True), XGBClassifier()]