<h1>ESG Funds Analysis<h1>
<h3>The following analysis will provide the answers to three specific questions:<h3>
<h4>1) Are ESG Funds more volatile than the market?<h4>
<h4>2) Are ESG Funds correlated to the market?<h4>
<h4>3) Which funds are providing higher Risk Adjusted Returns?<h4>

In [None]:
#Import libraries
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from functools import reduce
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import itertools
from datetime import datetime
%matplotlib inline

#Define Ticker Lists 
TickersList = ['SPY', 'UMLGX', 'VEGN', 'GCEQX','VLCGX','MIGFX','MGK','DGAGX', 'AVEGX', 'POLIX', 'VHIAX']#,'XZMU.DE','IESG.L']

alldfs = [SPY, UMLGX, VEGN, GCEQX, VLCGX, MIGFX, MGK, DGAGX, AVEGX, POLIX, VHIAX]#, XZMUDE, IESGL]

#Define Date Range
start_date = '2019-01-01'
#end_date = '2020-08-14'
end_date = datetime.today().strftime('%Y-%m-%d')

#Plot each Fund paired with the market in order to visually compare volatility & returns
for Ticker in TickersList:
    if Ticker != 'SPY':
        Tickerlst = Ticker, 'SPY'
        #print(lst)
        pairedstocksdata = web.DataReader(Tickerlst, 'yahoo', start_date, end_date)['Adj Close']
        plt.style.use('seaborn')
        pairedstocksdata.plot(figsize=(16, 12),legend=True)


#sns.set()
#plt.style.use('seaborn')#seaborn-pastel
plt.style.use('seaborn')

#Function used to set one dataframe for each ticker containing the ESG Fund data
def setdfs():
    for Ticker in TickersList:
        globals()[Ticker] = web.DataReader(Ticker, 'yahoo', start_date, end_date)
        #print('Ticker_' + Ticker)
    
setdfs()

#Function to calculate & Plot the Daily Returns for each dataframe loaded in the setdfs() function
def pctchg():
    for df in alldfs:
        df['Daily Return'] = df['Adj Close'].pct_change()  
        df['log_ret'] = np.log(df['Adj Close']) - np.log(df['Adj Close'].shift(1))
        df = df[['Adj Close','Daily Return','log_ret']]
        #display(df)
pctchg()

#Correlation between all Stocks
close_df = web.DataReader(TickersList,'yahoo', start_date, end_date)['Adj Close']
rets_df = close_df.pct_change()
#Plot the Correlogram with Seaborn
Correlogram = sns.pairplot(rets_df.dropna())
Correlogram.fig.suptitle("Fig. 11: Correlogram", fontsize=13)
#Plot Heatmap of the Correlations
fig12 = plt.subplots()
plt.suptitle('Fig. 12: Heat Map', fontsize=13)
sns.heatmap(rets_df.corr(), annot=True, fmt='.2f')
#Remove missing values (Generally the daily returns don't have the first day return because it is used to start the calculation)
rets = rets_df.dropna()
#Visually compare the expected return (daily returns) with the standard deviation of the daily returns
plt.style.use('classic')
plt.figure(figsize=(8,5))
plt.suptitle('Fig. 13: Compare Expected Returns with Standard Deviation', fontsize=13)
plt.scatter(rets.mean(),rets.std(),s=25)
plt.xlabel('Expected Return')
plt.ylabel('Risk')

plt.style.use('seaborn')

#Code to add annotatios in the scatterplot
for label,x,y in zip(rets.columns,rets.mean(),rets.std()):
    plt.annotate(label, xy=(x,y),xytext=(-120,20), textcoords = 'offset points', ha = 'right', va = 'bottom',
    arrowprops = dict(arrowstyle='->',connectionstyle = 'arc3,rad=-0.5'))
    
#Create full dataset removing missing values    
df_merged = pd.concat(alldfs, join='outer',axis=1).fillna('NA')

#Display full data set of all the Funds
print("Table 1: Full Dataset")
display(df_merged)
#Display Only Daily Returns for every Fund
print("Table 2: Daily Returns Dataset")
display(rets)

#Predictive Analysis:
#1.Instantiate: Set up the model - The Funds selected are the best performing 
#in terms of returns (insight derived from)
X = rets[['UMLGX', 'VEGN', 'MIGFX', 'MGK', 'VHIAX']]
y = rets['SPY']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42)

lm_model = LinearRegression(normalize=True)

#2.Fit Training: Fitting the model on the training data and trying to predict the test data
lm_model.fit(X_train, y_train)

#3.Predict Test Data: Trying to predict the test data
predictions = lm_model.predict(X_test)

#4.Score the model
print("Score:", lm_model.score(X_test, y_test))

#Plot the Model
fig3 = plt.subplots()
plt.suptitle('Fig. 14: Plot of the Model', fontsize=13)
plt.scatter(y_test, predictions)
plt.xlabel("True Values")
plt.ylabel("Predictions")