In [None]:
import datetime
import pandas_datareader as data
start = datetime.datetime(2018,12,1)
end = datetime.datetime(2019,11,1)
energy_df = data.DataReader(['FSLR', 'TAN', 'RGSE', 'SCTY'], 'yahoo', start = start, end = end)['Close']
energy_df

In [None]:
# Visualizing the relationship btw each stock and the ETF
import matplotlib.pyplot as plt
plt.scatter(returns.RGSE, returns.TAN)
plt.scatter(returns.FSLR, returns.TAN)
plt.scatter(returns.SCTY, returns.TAN)

In [None]:
energy_corr = returns.corr()
print(energy_corr)

In [None]:
# Basic Risk Analysis
    # Plotting the mean and std for each ticker to get a sense of the risk return profile
returns.mean()

In [None]:
plt.scatter(returns.mean(), returns.std())
plt.xlabel('Expected returns')
plt.ylabel('Standard deviations')
for label, x, y in zip(returns.columns, returns.mean(), returns.std()):
    plt.annotate(
        label,
        xy = (x, y), xytext = (20, -20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.9),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
plt.show

In [None]:
# Regression
# Steps for regression
    # Construct y (dependent variable series)
    # Construct matrix (dataframe) of X (Independent variable series)
    # Add intercept
    # Model the regression
    # Get the results

# check https://statsmodels.sourceforge.net/

import numpy as np
import statsmodels.api as sm
X = energy_df[['FSLR', 'RGSE', 'SCTY']]
X = sm.add_constant(X)
y = energy_df['TAN']
model = sm.OLS(y, X, missing = 'drop')
result = model.fit()
print(result.summary())
        

In [None]:
# Finally plot the fitted line with the actual y values
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(y)
ax.plot(result.fittedvalues)