In [1]:
import numpy as np
import pandas as pd

In [2]:
df1=pd.read_csv("Synthetic.csv")

In [3]:
df1.head()

Unnamed: 0.1,Unnamed: 0,X,Y
0,0,37.454012,126.746701
1,1,95.071431,293.927975
2,2,73.199394,225.4417
3,3,59.865848,183.586508
4,4,15.601864,39.020372


In [4]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  50 non-null     int64  
 1   X           50 non-null     float64
 2   Y           50 non-null     float64
dtypes: float64(2), int64(1)
memory usage: 1.3 KB


In [5]:
len(df1)

50

In [6]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

# Scaling is done
scaled_data=scaler.fit_transform(df1)

df2=pd.DataFrame(scaled_data,columns=df1.columns)

In [7]:
# Converting into Data Frame
df2.drop(columns="Y")
#df2.drop(columns="0")
         
df2["Y"]=df1["Y"].copy()     
#df2["Index"]=df1["0"].copy()

In [8]:
df2.head()

Unnamed: 0.1,Unnamed: 0,X,Y
0,-1.697749,-0.249611,126.746701
1,-1.628453,1.765126,293.927975
2,-1.559158,1.000316,225.4417
3,-1.489862,0.534075,183.586508
4,-1.420566,-1.013726,39.020372


### Gradient Descent

#### 1. Normal (Vanilla)

In [9]:
def partial_intercept(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.loc[i,"X"]
        y=df.loc[i,"Y"]
        diff=theta_0+theta_1*x-y
        diff_sum=diff_sum+diff
        
    val=(2/n)*diff_sum
    return val

def partial_slope(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.loc[i,"X"]
        y=df.loc[i,"Y"]
        diff=(theta_0+theta_1*x-y)*x
        diff_sum=diff_sum+diff
        
    val=(2/n)*diff_sum
    return val

def slope(theta_0,theta_1,df,alpha=10e-5):
    theta_new=theta_1-alpha*partial_slope(theta_0,theta_1,df)
    return theta_new

def intercept(theta_0,theta_1,df,alpha=10e-5):
    theta_new=theta_0-alpha*partial_intercept(theta_0,theta_1,df)
    return theta_new

def loss(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.loc[i,"X"]
        y=df.loc[i,"Y"]
        diff=(theta_0+theta_1*x-y)**2
        diff_sum=diff_sum+diff
    val=diff_sum/n
    return val

##### Scaled

In [10]:
def GD(theta_0,theta_1):
    epoch=0
    tol=1
    while True:
        prev_theta_0=theta_0
        prev_theta_1=theta_1
        
        theta_0=intercept(prev_theta_0,prev_theta_1,df2)
        theta_1=slope(prev_theta_0,prev_theta_1,df2)

        epoch+=1
        
        if (epoch>100 and abs(loss(theta_0,theta_1,df2) - loss(prev_theta_0,prev_theta_1,df2)) < tol):
            break
    ls=[theta_0,theta_1,epoch]
    return ls

In [11]:
GD(0,0)

[np.float64(97.97716164047867), np.float64(59.277984239621276), 5955]

##### Unscaled

In [12]:
def GD(theta_0,theta_1):
    epoch=0
    tol=0.00001
    while True:
        prev_theta_0=theta_0
        prev_theta_1=theta_1
        
        theta_0=intercept(prev_theta_0,prev_theta_1,df1)
        theta_1=slope(prev_theta_0,prev_theta_1,df1)

        epoch+=1
        
        if (epoch>100 and abs(loss(theta_0,theta_1,df1) - loss(prev_theta_0,prev_theta_1,df1)) < tol):
            break
    ls=[theta_0,theta_1,epoch]
    return ls

In [13]:
GD(0,0)

[np.float64(7.424304147695157), np.float64(2.986279204481324), 45999]

#### 2. Stochastic Gradient Descent

In [14]:
def intercept(theta_0,theta_1,df,alpha=10e-5):
    x=df.iloc[0]
    y=df.iloc[1]
    
    diff=theta_0+theta_1*x-y
    val=2*diff
    
    theta_new=theta_0-alpha*val
 
    return theta_new

def slope(theta_0,theta_1,df,alpha=10e-5):
    x=df.iloc[0]
    y=df.iloc[1]
    
    diff=(theta_0+theta_1*x-y)*x
    val=2*diff
    
    theta_new=theta_1-alpha*val

    return theta_new

def loss(theta_0,theta_1,df):
    x=df.iloc[0]
    y=df.iloc[1]
    diff=(theta_0+theta_1*x-y)**2
    return diff

In [15]:
def SGD(theta_0,theta_1):
    
    epoch=1
    tol=0.1
        
    df3=df2.sample(frac=1).reset_index(drop=True)
    
    n=len(df3)
    losses=0
    for i in range(n):
        df4=df3.iloc[i,[1,2]]

        losses+=loss(theta_0,theta_1,df4)
            
        prev_theta_0=theta_0
        prev_theta_1=theta_1
            
        theta_0=intercept(prev_theta_0,prev_theta_1,df4)
        theta_1=slope(prev_theta_0,prev_theta_1,df4)

    MSE_old=losses/n

    while True:
        df3=df2.sample(frac=1).reset_index(drop=True)
        
        losses=0
        for i in range(n):
            df4=df3.iloc[i,[1,2]]

            losses+=loss(theta_0,theta_1,df4)
            
            prev_theta_0=theta_0
            prev_theta_1=theta_1
            
            theta_0=intercept(prev_theta_0,prev_theta_1,df4)
            theta_1=slope(prev_theta_0,prev_theta_1,df4)

        MSE_new=losses/n

        epoch+=1
        
        if (epoch>10 and abs(MSE_new - MSE_old) < tol):
            break

        MSE_old=MSE_new
        
    ls=[theta_0,theta_1,epoch]
    return ls

In [16]:
SGD(0,0)

[np.float64(138.8586200936107), np.float64(84.01249975705808), 431]

#### 3. Batch Gradient Descent

In [17]:
def partial_intercept(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.iloc[i,0]
        y=df.iloc[i,1]
        diff=theta_0+theta_1*x-y
        diff_sum=diff_sum+diff
        
    val=(2/n)*diff_sum
    return val

def partial_slope(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.iloc[i,0]
        y=df.iloc[i,1]
        diff=(theta_0+theta_1*x-y)*x
        diff_sum=diff_sum+diff
        
    val=(2/n)*diff_sum
    return val

def slope(theta_0,theta_1,df,alpha=10e-5):
    theta_new=theta_1-alpha*partial_slope(theta_0,theta_1,df)
    return theta_new

def intercept(theta_0,theta_1,df,alpha=10e-5):
    theta_new=theta_0-alpha*partial_intercept(theta_0,theta_1,df)
    return theta_new

def loss(theta_0,theta_1,df):
    n=len(df)
    diff_sum=0
    for i in range(n):
        x=df.iloc[i,0]
        y=df.iloc[i,1]
        diff=(theta_0+theta_1*x-y)**2
        diff_sum=diff_sum+diff
    val=diff_sum/n
    return val

In [18]:
def BGD(theta_0,theta_1,batch_size=1):
    
    epoch=1
    tol=0.1
        
    df3=df2.sample(frac=1).reset_index(drop=True)
    
    n=len(df3)
    losses=0
    x=1
    for i in range(0,n,batch_size):
        if batch_size*x<=n:
            df_new=df3.iloc[i:batch_size*x,[1,2]]
            x+=1
        else:
            df_new=df3.iloc[i:n,[1,2]]

        losses+=loss(theta_0,theta_1,df_new)
            
        prev_theta_0=theta_0
        prev_theta_1=theta_1
            
        theta_0=intercept(prev_theta_0,prev_theta_1,df_new)
        theta_1=slope(prev_theta_0,prev_theta_1,df_new)

    MSE_old=losses/n

    while True:
        df3=df2.sample(frac=1).reset_index(drop=True)
        
        losses=0
        x=1
        for i in range(0,n,batch_size):
            if batch_size*x<=n:
                df_new=df3.iloc[i:batch_size*x,[1,2]]
                x+=1
            else:
                df_new=df3.iloc[i:n,[1,2]]

            losses+=loss(theta_0,theta_1,df_new)
            
            prev_theta_0=theta_0
            prev_theta_1=theta_1
            
            theta_0=intercept(prev_theta_0,prev_theta_1,df_new)
            theta_1=slope(prev_theta_0,prev_theta_1,df_new)

        MSE_new=losses/n

        epoch+=1
        
        if (epoch>10 and abs(MSE_new - MSE_old) < tol):
            break

        MSE_old=MSE_new
        
    ls=[theta_0,theta_1,epoch]
    return ls

In [19]:
BGD(0,0,11)

[np.float64(79.31074998662443), np.float64(47.96143786070895), 829]