In [10]:
# Step 1: Import libraries
import numpy as np
from sklearn.datasets import load_diabetes
import plotly.express as px

In [11]:
# Step 2: Load and preprocess dataset
from sklearn import preprocessing

X,y=load_diabetes(return_X_y=True)

y=y.reshape(len(y),1) #reshaping from (m,) to (m,1). Creating a new axis.

X=preprocessing.scale(X) #normalizing

X=np.c_[np.ones((X.shape[0],1)),X] #adding bias column

print(f"New shape of X: {X.shape}")

New shape of X: (442, 11)


In [12]:
# Step 3: Define Cost function
def compute_cost(W,X,y):
    m=X.shape[0] #number of samples
    y_pred = np.dot(X,W) 
    cost=(1/(2*m))*np.sum(((y_pred-y)**2)) #MSE
    return cost

In [13]:
# Step 4: Define Gradient Descent function
def update_parameters(W,X,learning_rate=0.01):
    y_pred = np.dot(X,W)
    diff=(y_pred-y)
    m=X.shape[0]
    dW=(1/m)*np.dot(X.T,diff)
    W_new = W-(learning_rate)*dW
    return W_new

In [14]:
# Here, we stop the descent when the cost doesn’t change significantly .i.e when the change in cost is less than the threshold.
def gradient_descent(W,X,y,threshold=0.0001,num_iters=1000,learning_rate=0.01):
    assert(X.shape[0]==y.shape[0])
    J_history=[]
    i=0
    change=1
    cost=float('inf')
    while i<num_iters and change>threshold:
        cost_prev=cost
        cost=compute_cost(W,X,y)
        change=cost_prev-cost
        W=update_parameters(W,X,learning_rate=learning_rate)
        J_history.append(cost)
        i+=1
    return J_history, W

In [15]:
# Step 5: Run
iters=1500
learning_rate=0.01

W=np.random.rand(X.shape[1],1) #initialize random weights 

print(f'Initial Cost: {compute_cost(W,X,y)}')

J_history, weights = gradient_descent(W,X,y,num_iters=iters,learning_rate=learning_rate) #perform gradient descent

print(f'Final cost: {J_history[-1]}') #print final cost
print(f'Final weights: {weights}') #print final weights

Initial Cost: 14306.71343982501
Final cost: 1438.424524521696
Final weights: [[152.13344122]
 [ -0.32201476]
 [-11.24787154]
 [ 25.13831232]
 [ 15.31612137]
 [ -6.03331381]
 [ -2.85046602]
 [ -8.85954475]
 [  5.19790009]
 [ 23.72122806]
 [  3.31958464]]


In [16]:
# Step 6: Plot descent
def plot_descent(J_history):
    fig=px.scatter(x=range(len(J_history)),y=J_history)
    fig.update_layout(title='Cost vs Iterations',
                      xaxis_title='Number of iterations-->',
                      yaxis_title='Cost -->')
    fig.show('png') #png for visibility in GitHub
  
plot_descent(J_history)

ValueError: 
The orca executable is required to export figures as static images,
but it could not be found on the system path.

Searched for executable 'orca' on the following path:
    C:\Users\Prashant\anaconda3
    C:\Users\Prashant\anaconda3\Library\mingw-w64\bin
    C:\Users\Prashant\anaconda3\Library\usr\bin
    C:\Users\Prashant\anaconda3\Library\bin
    C:\Users\Prashant\anaconda3\Scripts
    C:\Windows\system32
    C:\Windows
    C:\Windows\System32\Wbem
    C:\Windows\System32\WindowsPowerShell\v1.0\
    C:\Windows\System32\OpenSSH\
    C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR
    C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common
    C:\Android
    C:\WINDOWS\system32
    C:\WINDOWS
    C:\WINDOWS\System32\Wbem
    C:\WINDOWS\System32\WindowsPowerShell\v1.0\
    C:\WINDOWS\System32\OpenSSH\
    C:\Program Files\MySQL\MySQL Shell 8.0\bin\
    C:\Users\Prashant\AppData\Local\Microsoft\WindowsApps
    
    C:\Users\Prashant\anaconda3\lib\site-packages\numpy\.libs
    C:\Users\Prashant\anaconda3\lib\site-packages\scipy\.libs

If you haven't installed orca yet, you can do so using conda as follows:

    $ conda install -c plotly plotly-orca

Alternatively, see other installation methods in the orca project README at
https://github.com/plotly/orca

After installation is complete, no further configuration should be needed.

If you have installed orca, then for some reason plotly.py was unable to
locate it. In this case, set the `plotly.io.orca.config.executable`
property to the full path of your orca executable. For example:

    >>> plotly.io.orca.config.executable = '/path/to/orca'

After updating this executable property, try the export operation again.
If it is successful then you may want to save this configuration so that it
will be applied automatically in future sessions. You can do this as follows:

    >>> plotly.io.orca.config.save()

If you're still having trouble, feel free to ask for help on the forums at
https://community.plot.ly/c/api/python


In [31]:
# Let’s train the our model with scikit-learn’s LinearRegression() function.
from sklearn.linear_model import LinearRegression
linear_regressor=LinearRegression(fit_intercept=False)
linear_regressor.fit(X,y)

LinearRegression(fit_intercept=False)

In [33]:
Y_pred = linear_regressor.predict(X)

In [36]:
print(f'R2 score for train: {linear_regressor.score(X, y)}')

R2 score for train: 0.5177494254132934


In [37]:
y_pred_test = linear_regressor.predict(X)

In [38]:
from sklearn.metrics import mean_squared_error
print(f'mean squared error train : {mean_squared_error(y, y_pred)}')

mean squared error train : 2859.6903987680657


# End of EDA