In [11]:
# Standard Imports
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import chart_studio.plotly as py
#Import plotly.express, naming convention as px
import plotly.express as px
import plotly.graph_objs as go #importing graphical objects
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 
# Plot Settings
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
plt.rcParams['agg.path.chunksize'] = 10000

Let's work with our happiness dataset again. 

In [3]:
happiness_data = pd.read_csv("../data/happiness.csv")
happiness_data.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


### OLS

**OLS** stands for Ordinary Least Squares.  This is the most common method, and follows the standard Sklearn API for construction.  Use `LinearRegression` as the estimator.

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = happiness_data[['GDP per capita', 'Generosity']]
y = happiness_data[['Score']]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Construct lin_reg object
lin_reg = LinearRegression()

# Fit to data
lin_reg.fit(X_train, y_train)

# Predict
y_pred = lin_reg.predict(X_test)

# Check coef
print('Coefficients: ', lin_reg.coef_)

# MSE
print('Mean squared error: %.2f'
      % mean_squared_error(y_test, y_pred))

# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % r2_score(y_test, y_pred))

Coefficients:  [[2.16785954 1.36377472]]
Mean squared error: 0.44
Coefficient of determination: 0.69


#### Visualizing

In [14]:
def plot_multiple_happiness_regression():
    fig = px.scatter_3d(data_frame = happiness_data, x = 'Generosity', y = 'GDP per capita', z = 'Score')
    #I like smaller markers
    fig.update_traces(marker = dict(size = 5))
    # Get the min and max of our x and y axes
    x_min, x_max = X_test['Generosity'].min(), X_test['Generosity'].max()
    y_min, y_max = X_test['GDP per capita'].min(), X_test['GDP per capita'].max()

    # Create the mesh (interval) size
    mesh_size = 0.03

    # Create the ranges of the axes
    xrange = np.arange(x_min, x_max, mesh_size)
    yrange = np.arange(y_min, y_max, mesh_size)

    # Create MeshGrid
    xx, yy = np.meshgrid(xrange, yrange)

    # Predict the model on the raveled (flattened) versions of the xx and yy gen'd by MeshGrid function
    y_pred = lin_reg.predict(np.c_[xx.ravel(), yy.ravel()])

    #Reshape to the xx so all our dimensions are the same for our goSurface
    y_pred = y_pred.reshape(xx.shape)

    # Add traces, with our z being the y_preds
    fig.add_traces(go.Surface(x= xrange, y = yrange, z=y_pred, name="predictions"))
    fig.show()

In [15]:
plot_multiple_happiness_regression()

### Ridge Regression

Plots the sum of the squared residuals vs slope values, constantly changing the line through the data value. 

## Choosing

In [1]:
![](https://miro.medium.com/max/700/1*_Wx0vKokbXd20HlbLKpj2A.jpeg)

zsh:1: unknown file attribute: h
