In [None]:
import copy
import plotly
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import sklearn
from   sklearn import linear_model


# **Regression revisited: 2 independent, 1 dependent**

Last time we did linear regression with 1 independent variable, x, and 1 dependent, y.  Now lets use 2 independent variables, x and y, to predict one dependent variable, z.

### We'll reuse this function from last time, to plot point clouds and their means.



In [None]:
def plot_cloud_with_mean( A, 
                         point_cloud_name = '',
                         plot_title = '', 
                         axis_limits = None, 
                         return_plotting_data = False,
                         show_plot = True):
  # RECALCULATE THE MEAN X AND Y COORDINATES
  mean_x, mean_y, mean_z = A.mean(axis=0)
  # GENERATE PLOTTING DATA FOR THE POINT CLOUD  
  trace_cloud = go.Scatter3d(x=A[:,0], y=A[:,1], z=A[:,2], mode='markers', marker=dict(size=10,color='blue'), showlegend=True, name = "point cloud"+point_cloud_name)
  # GENERATE PLOTTING DATA FOR THE MEAN  
  trace_mean = go.Scatter3d(x=[mean_x], y=[mean_y], z=[mean_z], mode='markers', showlegend=True, marker=dict(size=10,color='red'), name = f'mean(point cloud{point_cloud_name})')
  # STORE ALL THE TRACES IN A LIST
  data = [trace_cloud, trace_mean]
  # GENERATE A "LAYOUT" OBJECT
  if axis_limits is None:
    layout = go.Layout(title=plot_title)
  else:
    layout = go.Layout(title=plot_title, scene = dict( xaxis=dict(range=axis_limits), yaxis=dict(range=axis_limits), zaxis=dict(range=axis_limits), aspectmode = 'cube'))
  # COMBINE THE PLOTTING DATA WITH THE LAYOUT OBJECT, AND PLOT  
  if show_plot:
    fig = go.Figure(data = data,layout = layout)
    fig.show()
  if return_plotting_data:
    return data,layout


## Step 1: Player 1 chooses a point cloud

For convenience let's take the one we've already been working with.

In [None]:
# DEFINE THE ARRAY
A = 2 + np.array( [ [3, 3, 1 ],
                    [2, -1, 4],
                    [-5, -4, -2],
                    [-6, 1, -4]])

# AS BEFORE, CONVERT TO FLOAT TYPE
A = A.astype(float)

# PLOT THE CLOUD
plot_cloud_with_mean(A, axis_limits = [-8,8])


## Step 2: Player 2 draws a plane parallel to the x-y plane, passing through the center point.

In [None]:

# GET THE PLOTTING DATA AND LAYOUT FOR THE ORIGINAL PLOT
data, layout = plot_cloud_with_mean(A, axis_limits = [-8,8], return_plotting_data=True, show_plot=False)

# CREATE HORIZONTAL DASHED LINE FOR HEIGHT
mean_x, mean_y, mean_z = np.mean(A,axis=0)
axis_limits               = [-8,8]
z_coords                  = mean_z * np.ones((2,2))

trace_horizontal_line     = go.Surface(   
                            x=axis_limits, 
                            y=axis_limits,
                            z=z_coords,
                            opacity = 0.5,
                            name='average height')

# COMBINE + PLOT
data_player1 = data + [trace_horizontal_line]
fig = go.Figure(data=data_player1, layout=layout)
fig.show()

## Step 3: Player 1 draws a line from each point in the cloud to a point on the dotted line (specifically, the point with the same x-coordinate)

The sume of squares of these lines is the "starting" quantity that Player 2 wants to reduce.  In this case it's

$$\text{SumOfSquares} = 3^2 + 0^2 + 3^2 = 18$$

In [None]:
# CREATE A NEW POINT CLOUD BY REPLACING EACH Y-COORDINATE WITH mean_y, AND EACH
# Z-COORDINATE WITH mean_z
B = copy.deepcopy(A)
B[:,2] = np.mean(A[:,2])

# LET'S MAKE A COPY OF THIS FOR LATER USE, CALLED B_baseline
B_baseline = copy.deepcopy(B)

# WRITE A FUNCTION TO GENERATE TRACES FOR THE JOINING LINES
def residual_traces(cloud0, cloud1):
  """
  :param cloud0: an m x n matrix whose rows represent points in a point cloud
  :param cloud1: an m x n matrix whose rows represent points in a point cloud 
  :return: a list of traces connecting the mth point in cloud0 to the mth 
  point in cloud1
  """
  data_residual = [   go.Scatter3d( x=[cloud0[q,0], cloud1[q,0]], 
                                    y=[cloud0[q,1], cloud1[q,1]], 
                                    z=[cloud0[q,2], cloud1[q,2]], 
                                    # text=['', f'offset= {np.linalg.norm(cloud0[q]-cloud1[q])}', ''],
                                    textposition='top left',
                                    mode='lines', 
                                    line=dict(color='red'),
                                    name = f'vertical offset= {np.linalg.norm(cloud0[q]-cloud1[q])}') \
                    for q in range(cloud0.shape[0])]  
  return data_residual

# COMBINE THE PLOT WITH THE VISUAL LINES
data_residual = residual_traces(A,B)
data_player1_vlines = data_player1 + data_residual
fig = go.Figure(data = data_player1_vlines, layout=layout)

# CALCULATE DISTANCE FROM EACH POINT TO ITS "GUESSED" POINT
differences = [np.linalg.norm(A[row,[1,2]]-B[row,[1,2]]) for row in range(A.shape[0])]
differences_sq = np.array(differences)**2
differences_sq = np.round(differences_sq,4)

# MODIFY TITLE
fig.update_layout(title=f'SumOfSquaredLengths = {differences_sq[0]}+{differences_sq[1]}+{differences_sq[2]} = {differences_sq.sum()}')

# DISPLAY
fig.show()

# Step 4: Payer 2 draws a new plane

The plane is given by equation
\begin{align*}
z &= m_1 x + m_2 y + b \\
\end{align*}
or equivalently by the matrix expression


**If the data lay perfectly along this plane** then we could think of $m_1$ as the rate at which $z$ increases with $x$ and $m_2$ as the rate at which $z$ increases with $y$.  We think of $b$ as the value of $z$ when $x = y = 0$.

Let's eyeball the values for $m_1, m_2$ and $b$.  Looking at the plot, $z$ seems to increase as $x$ increases, and decrease as $y$ increases.  So let's try $m_1 = 1$ and and $m_2 = -1$.  We'll try $b=0$ cause it looks about right and we don't feel like thinking about it too hard.

We can rephrase these formulae in terms of matrix  multiplication:

\begin{align*}
\left[\begin{array}{c}z \end{array}\right]
=
\left[\begin{array}{cc} x & y \end{array}\right]
* 
\left[\begin{array}{c} m_1 \\ m_2 \end{array}\right]
+
\left[\begin{array}{c} b\end{array}\right]
\end{align*}

If we have a whole sequence of x-values, $(x_1, ..., x_n)$, we can write the seqeunce of y-values in almost exactly the same way **by using matrix multiplication**

\begin{align*}
\left[\begin{array}{c}z_0 \\ \vdots \\ z_n\end{array}\right]
=
\left[\begin{array}{cc}x_0 & y_0 \\ \vdots & \vdots \\ x_n & y_n\end{array}\right]
* 
\left[\begin{array}{c} m_1 \\ m_2\end{array}\right]
+
\left[\begin{array}{c} b \\ \vdots \\ b \end{array}\right]
\end{align*}

or more specifically, in our case,

\begin{align*}
\left[\begin{array}{c}z_0 \\ \vdots \\ z_n\end{array}\right]
=
\left[\begin{array}{cc}x_0 & y_0 \\ \vdots & \vdots \\ x_n & y_n\end{array}\right]
* 
\left[\begin{array}{c} 1 \\ -1\end{array}\right]
+
\left[\begin{array}{c} 0 \\ \vdots \\ 0 \end{array}\right]
\end{align*}

### Let's write a function to do this so we don't have to
Let's write a function that will take a sequence of x-values, and complete it to a sequence of x,y,z values (recording the output as the rows of a matrix)

In [None]:


# WRITE A FUNCTION TO "ADD" Y AND Z COORDINATES TO EACH X

def extend_xy_to_xyz(xy,slopes=[1,1],intercept=0):
  """
  Input: a list or vector of x values
  Output: an array B such that 
    (i ) the first column of B equals x, 
    (ii) each row of B is a point on the line that we have guessed
  """
  xy          = np.array(xy).astype(float).reshape(-1,2)    # convert x to a width-2 array, if it isn't one already
  slopes      = np.array(slopes).reshape(-1,1)              # convert the slope list into a column vector
  z           = np.matmul(xy, slopes) + intercept           # APPLY THE MATRIX FORMULA
  xyz         = np.concatenate((xy,z),axis=1)               # concatenate the x and xy matrices
  return xyz

These equations determine a line.  For each point in our cloud, let's find the point on this line that shares the same x-coordinate.  How do we do this?  Well, pick a point $(x_i, y_i, z_i)$.  We already know that the x-coordinate should be *and* we have formulas for y and z!



In [None]:
xy_coords = A[:,[0,1]]

xyz_coords_guesscloud = extend_xy_to_xyz( xy_coords, 
                                         slopes=[1,1], 
                                         intercept = 0)

print("x coordinates")
display(xy_coords)

print("xyz coordinates")
display(xyz_coords_guesscloud)

x coordinates


array([[ 5.,  5.],
       [ 4.,  1.],
       [-3., -2.],
       [-4.,  3.]])

xyz coordinates


array([[ 5.,  5., 10.],
       [ 4.,  1.,  5.],
       [-3., -2., -5.],
       [-4.,  3., -1.]])

### Let's plot.



In [None]:
# PLOT

# RECALCULATE THE POINTS ON THE GUESSED LINE
xy_coords = A[:,[0,1]]
xyz_coords_guesscloud = extend_xy_to_xyz( xy_coords, 
                                        slopes=[1,-1], 
                                        intercept=0)

# GET THE PLOTTING DATA AND LAYOUT FOR THE ORIGINAL POINT CLOUD
data, layout = plot_cloud_with_mean(A, axis_limits = [-8,8], return_plotting_data=True, show_plot=False)

# CREATE A NEW PLANE (WE'LL DO THIS BY JOINING FOUR CORNER POINTS)

# WRITE A FUNCTION TO CALCULATE Z-COORDINATES
def axis_limits_to_z_coordinates(axis_limits, slopes = [1,-1], intercept=0):
  z_coords = np.zeros((2,2)) # we'll put the z-coordinate for corner (i,j) into entry (i,j) of this matrix
  for row in range(2):
    for col in range(2):
      # notice that we are reversing row and column below; this has to do with 
      # an unfortunate indexing convention in plotly
      z_coords[col,row] =  extend_xy_to_xyz(  [axis_limits[row],axis_limits[col]], 
                                              slopes=slopes, 
                                              intercept=intercept)[0,2] # <-- since this function returns a row vector, we have to extract out the last entry of the row
  return z_coords

# CALCULATE THE Z-COORDINATES                                              
axis_limits = [-8,8] # the (x,y coordinates of each point will be -8 or 8)
z_coords = axis_limits_to_z_coordinates(axis_limits)

# MAKE THE SURFACE TRACE
trace_surface_guess   = go.Surface(   
                            x=axis_limits, 
                            y=axis_limits,
                            z=z_coords,
                            opacity = 0.5,
                            name='player 2 guess')

# ADD THE TRACE TO THE LIST OF TRACES
data_player2 = data + [trace_surface_guess]

vline_traces_player2 = residual_traces(A, xyz_coords_guesscloud)
data_player2 = data_player2 + vline_traces_player2

# CREATE FIGURE
fig = go.Figure(data = data_player2, layout=layout)

# CALCULATE DISTANCE FROM EACH POINT TO ITS "GUESSED" POINT
differences = A[:,2] - xyz_coords_guesscloud[:,2]
differences_sq = np.array(differences)**2
differences_sq = np.round(differences_sq,4)

# MODIFY TITLE
fig.update_layout(title=f'SumOfSquaredLengths = {differences_sq[0]}+{differences_sq[1]}+{differences_sq[2]} = {differences_sq.sum()}')

# DISPLAY
fig.show()

This is better - but still not best!

# Step 5: Linear regression paracheuts into the game, and wins!

In [None]:
# FORMAT THE DATA
xy_matrix = A[:,[0,1]] # why the extra brackets?  because the package we use to run linear regression wants *column vectors*
z_matrix = A[:,[2]] # why the extra brackets?  because the package we use to run linear regression wants *column vectors*

# FORMAT THE REGRESSOR
reg       =   sklearn.linear_model.LinearRegression()
reg.fit(xy_matrix, z_matrix)

# EXTRACT SLOPE AND INTERCEPT
slopes_linreg     =   reg.coef_
intercepts_linreg =   reg.intercept_

print("slope coefficients")
print(slopes_linreg)
print("intercept")
print(intercepts_linreg)

# GET THE PROJECTED POINT CLOUD
xy_coords = A[:,[0,1]]
xyz_coords_guesscloud = extend_xy_to_xyz( xy_coords, 
                                          slopes=slopes_linreg, 
                                          intercept=intercepts_linreg)

# MAKE A COPY FOR FUTURE USE
B_linreg  =   copy.deepcopy(xyz_coords_guesscloud)

# GET THE PLOTTING DATA AND LAYOUT FOR THE ORIGINAL POINT CLOUD
data, layout = plot_cloud_with_mean(A, axis_limits = [-8,8], return_plotting_data=True, show_plot=False)

# CREATE A NEW PLANE (WE'LL DO THIS BY JOINING FOUR CORNER POINTS)

# CALCULATE THE Z-COORDINATES                                              
axis_limits = [-8,8] # the (x,y coordinates of each point will be -8 or 8)
z_coords = axis_limits_to_z_coordinates(axis_limits,slopes=slopes_linreg, intercept=intercepts_linreg)

# MAKE THE SURFACE TRACE
trace_surface_guess   = go.Surface(   
                            x=axis_limits, 
                            y=axis_limits,
                            z=z_coords,
                            opacity = 0.5,
                            name='player 2 guess')

# ADD THE TRACE TO THE LIST OF TRACES
data_player2 = data + [trace_surface_guess]

# COMBINE THE PLOT WITH THE VISUAL LINES
vline_traces_player2 = residual_traces(A, xyz_coords_guesscloud)
data_player2 = data_player2 + vline_traces_player2

# CREATE FIGURE
fig = go.Figure(data = data_player2, layout=layout)

# CALCULATE DISTANCE FROM EACH POINT TO ITS "GUESSED" POINT
differences = A[:,2] - xyz_coords_guesscloud[:,2]
differences_sq = np.array(differences)**2
differences_sq = np.round(differences_sq,4)

# MODIFY TITLE
fig.update_layout(title=f'SumOfSquaredLengths = {differences_sq[0]}+{differences_sq[1]}+{differences_sq[2]} = {differences_sq.sum()}')

# DISPLAY
fig.show()

slope coefficients
[[ 0.81707766 -0.49282297]]
intercept
[2.20390136]


### Notice: the best fit plane passes through the mean

As we can see, in this example.  Interestingly, it *always* passes through the mean (a fact which takes some real work to prove).

# Lessons from this example

* Mathematically, this is similar to the preceding examples
  * we use formulas similar to $y = mx + b$
  * we connect "original" points to points on a plane (instead of line)
    * in particular, we send each point in the original cloud to the point on the plane that shares the same x and y coordinates