In [31]:
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import metrics

# Gaussian noise
xmin, xmax = -5, 5
n_samples = 100
np.random.seed(0)
X = np.random.normal(size=n_samples)
y = (X > 0).astype(np.float)
X[X > 0] *= 4
X += .3 * np.random.normal(size=n_samples)

X = X[:, np.newaxis]
X = X.reshape(100,1)
y = y.reshape(100,1)

XY = np.concatenate((X, y), axis=1)
train, test = train_test_split(XY, test_size = 0.3)

X_train = (train[:,0]).reshape(70,1)
Y_train = (train[:,1]).reshape(70,1)
X_test = (test[:,0]).reshape(30,1)
Y_test = (test[:,1]).reshape(30,1)
# run the classifier
clf = linear_model.LogisticRegression(C=1e5)

clf.fit(X_train, np.ravel(Y_train,order='C'))
predict = (clf.predict(X_test)).reshape(30,1)

print("Coefficients for Logistic Regression:", clf.coef_)
print("Intercept:", clf.intercept_)

Y_predict = pd.DataFrame(predict,columns=["low rate"])
Y_test_df = pd.DataFrame(Y_test,columns=["low rate"])

cnf_matrix_logis = metrics.confusion_matrix(Y_test_df, Y_predict)
print("Confusion matrix for Logistic Regression: \n",cnf_matrix_logis)

TP = predict * Y_test
TN = 1 - np.maximum(predict , Y_test)
ERR = 1 - ((sum(TP)+sum(TN))/len(Y_predict))

print("ERR for Logistic Regression: ",ERR)

Coefficients for Logistic Regression: [[7.71]]
Intercept: [-2.31]
Confusion matrix for Logistic Regression: 
 [[13  0]
 [ 1 16]]
ERR for Logistic Regression:  [0.03]


In [32]:
from sklearn import linear_model

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training set
regr.fit(X_train, Y_train)

# Print the model coefficients
np.set_printoptions(precision=2)
print("Coefficients for Linear Regression:", regr.coef_)
print("Intercept:", regr.intercept_)

# Compute the mean squared error of the model over the test set
Y_pred = regr.predict(X_test)
#NMAE = np.mean(np.abs(Y_pred - Y_test))/Y_test.mean()
#print("Normalized Mean Absolute Error (NMAE): %.2f" %NMAE)

pred = np.zeros(len(Y_pred),dtype=int)
for i in range(len(Y_pred)):
    if (Y_pred[i]<0.5):
        pred[i] = 1
    else:
        pred[i] = 0
        
y_test = np.zeros(len(Y_test),dtype=int)
for i in range(len(Y_test)):
    if (Y_test[i]<0.5):
        y_test[i] = 1
    else:
        y_test[i] = 0
        

Y_pred = pd.DataFrame(pred,columns=["low rate"])
y_test_df = pd.DataFrame(y_test,columns=["low rate"])        
cnf_matrix_linear = metrics.confusion_matrix(y_test_df, Y_pred)
print("Confusion matrix for Logistic Regression: \n",cnf_matrix_linear)
# Compute confusion matrix
TP = pred * y_test
TN = 1 - np.maximum(pred , y_test)
ERR = 1 - ((sum(TP)+sum(TN))/len(pred))

print("Error of classifier based on the linear regression",ERR)

Coefficients for Linear Regression: [[0.12]]
Intercept: [0.39]
Confusion matrix for Logistic Regression: 
 [[15  2]
 [ 0 13]]
Error of classifier based on the linear regression 0.06666666666666665
