In [None]:
import pandas
import matplotlib.pyplot as plt
import numpy as np

# Read the data

First, we need to read the data. To do this, we will use the python package pandas, which allows us to read spreadsheets as 'dataframes' and manipulate them. To learn more about pandas, go here: https://pandas.pydata.org/docs/getting_started/index.html

We will use the pandas function read_csv, which takes as a parameter the name of the file.

**Change**: In the code block below, replace 'your file' with the path to the all.csv file. You can use a relative path (i.e., the path from the current location) or the full path (i.e., the path that would work from any location). The relative path will just be the file name, since the file is in this directory.

Note that to use the function read_csv from the pandas package I use the structure: pandas.functionname(). Also notice, that we use dataframe.head() to view the first few rows of the dataframe.

In [None]:
anolis_data = pandas.read_csv(your file)
anolis_data.head()

# Plot the data

Now, let's plot some of the data to help decide which variables to use. We'll use the library matplotlib for plotting. Specifically, we imported matplotlib.pyplot as plt. This allows us to refer to functions faster.

**Change**: In the below code replace 'Fibula' and 'Tibia' with different variables you want to consider for your y and x variables, respectively.

**Decide**: Chose an X and Y variable to use moving forward, keeping in mind that we want to be able to predict Y from X. You can choose to try and predict ecological attributes from a morphological trait, or to predict one morphological trait from another.

In [None]:
variable1='Fibula'
variable2='Tibia'
plt.scatter(x=anolis_data[variable1],y=anolis_data[variable2])
plt.xlabel(variable1)
plt.ylabel(variable2)

# Functions for making predictions, plotting predictions.

I've included the functions below, please answer the questions about this code for your activity.

**Answer Question 1**

In [None]:
def make_predictions (x,y,w,b):
    pred = np.array(anolis_data[x])*np.array(w)+np.array(b)
    mse = sum((np.array(anolis_data[y])-pred)**2) * (1/(2*len(pred)))
    plt.scatter(x=anolis_data[x], y=anolis_data[y])
    plt.plot(anolis_data[x], pred, color="green")
    [plt.vlines(x=anolis_data[x][i], ymin=anolis_data[y][i], ymax=pred[i], color="red", linestyle="--", alpha=0.5) for i in range(len(x))]
    plt.ylabel(y)
    plt.xlabel(x)
    return(pred,mse)

# Change the values of w and b

**Add**: Use the functions above to make and plot different predictions for different values of w and b.

**Answer Question 2**

In [None]:
# Add code to use the functions above
make_predictions(x=variable1, y=variable2, w=1, b=0)

# Function for Gradient Descent

Below, I have written a function that will perform gradient descent in python. The function takes several parameters.

* x: the features from your data.
* y: the response/labels from your data.
* iterations: the number of iterations for which to run gradient descent.
* b_init: the value to begin with for b (Default = 0)
* w_init: the value to begin with for w (Default = 0)
* learning rate: the value of alpha to use (Default = 0.005)

**Answer Question 3**

In [None]:
def fit(x, y, iterations, b_init=0, w_init=0, learning_rate=0.005):
    mse = []
    b_states = []
    w_states = []
    b = b_init
    w = w_init
    n = anolis_data[x].shape[0]
    b_states.append(b)
    w_states.append(w)
    for j in range(iterations):
        pred = np.array(w)*np.array(anolis_data[x]) + np.array(b)
        this_mse = sum((np.array(anolis_data[y])-pred)**2) * (1/(2*len(pred)))
        mse.append(this_mse)
        b_gradient =  np.sum(pred-anolis_data[y]) / n
        w_gradient = np.sum(anolis_data[x]*((pred-anolis_data[y]))) / n
        b = b - (learning_rate * b_gradient)
        w = w - (learning_rate * w_gradient)
        b_states.append(b)
        w_states.append(w)
    pred = np.array(w)*np.array(anolis_data[x]) + np.array(b)
    this_mse = sum((np.array(anolis_data[y])-pred)**2) * (1/(2*len(pred)))
    mse.append(this_mse)

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))  # Adjust figsize as needed

    axes[0].scatter(anolis_data[x], anolis_data[y])
    axes[0].plot(anolis_data[x], pred, color="green")
    [axes[0].vlines(x=anolis_data[x][i], ymin=anolis_data[y][i], ymax=pred[i], color="red", linestyle="--", alpha=0.5) for i in range(len(pred))]
    axes[0].set_ylabel(y)
    axes[0].set_xlabel(x)
    
    axes[1].plot(range(len(w_states)), mse, color="blue")
    axes[1].set_ylabel('MSE')
    axes[1].set_xlabel("Iteration")

    return(w,b, mse, w_states, b_states)

# Use gradient descent!

Set the parameters below, and run gradient descent using the function above!

**Add**: Add the settings for all the parameters.

**Answer Question 4 (everyone) and Question 5 (6990)**


In [None]:
x = 'Tibia'
y = 'Fibula'
iterations = 200
b_init = 0
w_init = 0
learning_rate = 0.01

fit_w, fit_b, mse, w_states, b_states=fit(x=x, y=y, iterations=iterations, b_init=b_init, w_init=w_init, learning_rate=learning_rate)

print("Final w: %s\nFinal b: %s\nFinal MSE: %s" % (fit_w, fit_b, mse[-1]))
