## Linear and Logistic Regression

#### Matrix Algebra
#### Gradient Descent
#### sklearn 

In [145]:
#Import necessary libraries
import pandas as pd 
import numpy as np
import random
from sklearn import datasets
from sklearn.preprocessing import StandardScaler #MinMaxScaler
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error
import random # may be used for initializing weights

df=pd.read_csv('manhattan.csv')
df.head()

Unnamed: 0,rental_id,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym,neighborhood,borough
0,1545,2550,0.0,1,480,9,2.0,17,1,1,0,0,1,1,0,1,Upper East Side,Manhattan
1,2472,11500,2.0,2,2000,4,1.0,96,0,0,0,0,0,0,0,0,Greenwich Village,Manhattan
2,2919,4500,1.0,1,916,2,51.0,29,0,1,0,1,1,1,0,0,Midtown,Manhattan
3,2790,4795,1.0,1,975,3,8.0,31,0,0,0,1,1,1,0,1,Greenwich Village,Manhattan
4,3946,17500,2.0,2,4800,3,4.0,136,0,0,0,1,1,1,0,1,Soho,Manhattan


In [146]:
#Feature scaling
scaler=StandardScaler()
df_scaled=df.copy()
df_scaled[['rent','size_sqft','min_to_subway','building_age_yrs']]=scaler.fit_transform(df[['rent','size_sqft','min_to_subway','building_age_yrs']])
df_scaled.head()

Unnamed: 0,rental_id,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym,neighborhood,borough
0,1545,-0.818669,0.0,1,-0.962011,0.730862,2.0,-0.888763,1,1,0,0,1,1,0,1,Upper East Side,Manhattan
1,2472,2.01148,2.0,2,2.218694,-0.176116,1.0,1.117593,0,0,0,0,0,0,0,0,Greenwich Village,Manhattan
2,2919,-0.202044,1.0,1,-0.049651,-0.538908,51.0,-0.584,0,1,0,1,1,1,0,0,Midtown,Manhattan
3,2790,-0.10876,1.0,1,0.073811,-0.357512,8.0,-0.533206,0,0,0,1,1,1,0,1,Greenwich Village,Manhattan
4,3946,3.908786,2.0,2,8.077886,-0.357512,4.0,2.13347,0,0,0,1,1,1,0,1,Soho,Manhattan


In [147]:
#Linear regression with matrix algebra
ones=np.ones(df_scaled.shape[0])
df_scaled_mat=np.asarray(df_scaled)
X=np.column_stack((ones,df_scaled_mat[:,(4,5,7)]))
Y=df_scaled_mat[:,1]
XtX=np.matmul(np.transpose(X),X)
XtX=XtX.astype(float)
XtX_inv=np.linalg.inv(XtX)
XtY=np.matmul(np.transpose(X),Y)
XtY=XtY.astype(float)
beta_hat=np.matmul(XtX_inv, XtY)
#np.set_printoptions(precision=4)
np.set_printoptions(formatter={'float_kind':lambda x: "{0:0.3f}".format(x)})
beta_hat

array([-0.000, 0.861, -0.026, -0.146])

In [148]:
#Linear regression with sklearn
mlr=LinearRegression()
X_train,X_test, Y_train, Y_test= train_test_split(df_scaled_mat[:,(4,5,7)],Y,test_size=0.2,random_state=1)
mlr.fit(X_train,Y_train)
print(mlr.coef_)
print(mlr.intercept_)
Y_pred=mlr.predict(X_test)
print('R squared :' + str(mlr.score(X_test,Y_test))) #R squared =1- (RSS/TSS)
print('MSE : ' +str(mean_squared_error(Y_test,Y_pred)))
my_apartment=[[0.5,0.5,0.5]]
y_predicted=mlr.predict(my_apartment)
y_predicted

[0.871 -0.032 -0.148]
0.00397448133310336
R squared :0.7269271423442212
MSE : 0.23551844173659248


array([0.350])

In [149]:
#Linear regression with gradient descent
#Initial parameter and hyperparameters
lr=0.001; batch=32 #SGD when batch is 1
#beta=[0 for i in range(X.shape[1])]
beta=np.zeros(X.shape[1])

#train_test_split
X_train,X_test, Y_train, Y_test= train_test_split(X,Y,test_size=0.2,random_state=1)

num_batch=int((len(X_train))/batch)

for i in range(num_batch):
    if i!=num_batch-1: 
        X_batch=X_train[i*batch:(i+1)*batch,:]
        Y_batch=Y_train[i*batch:(i+1)*batch]
        beta=beta-lr*2*np.matmul(X_batch.T,np.matmul(X_batch,beta)-Y_batch) 
    else:
        X_batch=X_train[i*batch:,:]
        Y_batch=Y_train[i*batch:]
        beta=beta-lr*2*np.matmul(X_batch.T,np.matmul(X_batch,beta)-Y_batch) 

MSE=np.mean((np.matmul(X_test,beta)-Y_test)**2)
print(beta)
print('MSE : '+ str(MSE))

[0.018705191864842523 0.8982575983474635 -0.06449557701756103
 -0.1431149603744028]
MSE : 0.2423873115644294


In [150]:
#SGD Regressor in sklearn
sgdreg=SGDRegressor(loss='squared_loss',alpha=0.001,max_iter=3500,learning_rate='constant')
X_train,X_test, Y_train, Y_test= train_test_split(df_scaled_mat[:,(4,5,7)],Y,test_size=0.2,random_state=1)
sgdreg.fit(X_train,Y_train)
print(sgdreg.coef_)
print(sgdreg.intercept_)
Y_pred=sgdreg.predict(X_test)
print('R squared :' + str(sgdreg.score(X_test,Y_test))) #R squared =1- (RSS/TSS)
print('MSE : ' +str(mean_squared_error(Y_test,Y_pred)))
my_apartment=[[0.5,0.5,0.5]]
y_predicted=sgdreg.predict(my_apartment)
y_predicted

[0.863 0.024 -0.110]
[0.025]
R squared :0.7263380612862852
MSE : 0.23602650926850555


array([0.413])

#### Logistic Regression

In [179]:
from sklearn.linear_model import LogisticRegression
data=datasets.load_breast_cancer()
df=pd.DataFrame(data.data,columns=data.feature_names)
df['target']=data.target
df['target'].value_counts() #malignant 357 not malignant 212
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [241]:
#Logistic regression do not have a closed-form solution
#So we need to estimate the parameters numerically 
#Gradient descent
ones=np.ones(df.shape[0])
df_mat=np.asarray(df)
X=np.column_stack((ones,df_mat[:,:30]))
Y=df_mat[:,30]

#Initial parameter and hyperparameters
lr=0.001; batch=32; num_iter=1000#SGD when batch is 1
beta=np.zeros(X.shape[1])

#train_test_split
X_train,X_test, Y_train, Y_test= train_test_split(X,Y,test_size=0.2,random_state=1)
num_batch=int((len(X_train))/batch)

#Define functions
def sigmoid(z):
    return 1/(1+np.exp(-z))

def logreg_fn(z):
    if z>=1/2:
        return 1
    else:
        return 0
logreg_fn=np.vectorize(logreg_fn)

#Start training

for _ in range(num_iter):
    for i in range(num_batch):
        if i!=num_batch-1: 
            X_batch=X_train[i*batch:(i+1)*batch,:]
            Y_batch=Y_train[i*batch:(i+1)*batch]
            #f(x)=sigmoid(X'b)
            fx=sigmoid(np.matmul(X_batch,beta))
            derivative_beta=-np.matmul(X_batch.T,Y_batch-fx)/(X_batch.shape[0])
            
            beta=beta-lr*derivative_beta
        else:
            X_batch=X_train[i*batch:(i+1)*batch,:]
            Y_batch=Y_train[i*batch:(i+1)*batch]
            fx=sigmoid(np.matmul(X_batch,beta))
            derivative_beta=-np.matmul(X_batch.T,Y_batch-fx)/(X_batch.shape[0])
            
            beta=beta-lr*derivative_beta

MSE=np.mean((logreg_fn(sigmoid(np.matmul(X_test,beta)))-Y_test)**2)
print(beta)
print('MSE : '+ str(MSE))



[0.110 0.803 -0.040 3.229 0.394 -0.001 -0.045 -0.074 -0.028 -0.003 0.002
 0.025 -0.040 -0.142 -0.916 -0.001 -0.015 -0.022 -0.005 -0.003 -0.001
 0.867 -1.220 1.952 -0.855 -0.006 -0.161 -0.215 -0.053 -0.023 -0.009]
MSE : 0.07894736842105263


In [234]:
#Logistic regression with sklearn
logreg=LogisticRegression(random_state=0,max_iter=3500)
logreg.fit(X_train,Y_train)
logreg.score(X_test,Y_test)
print(logreg.coef_)
print(logreg.intercept_)
Y_pred=logreg.predict(X_test)
print('R squared :' + str(logreg.score(X_test,Y_test))) #R squared =1- (RSS/TSS)
print('MSE : ' +str(mean_squared_error(Y_test,Y_pred)))

[[0.005 0.784 0.132 -0.442 0.040 -0.121 -0.165 -0.424 -0.228 -0.208
  -0.016 -0.024 0.317 0.394 -0.078 -0.028 0.036 -0.059 -0.039 -0.045
  0.009 0.087 -0.315 -0.136 -0.015 -0.246 -0.444 -0.978 -0.454 -0.514
  -0.038]]
[37.875]
R squared :0.9473684210526315
MSE : 0.05263157894736842
