In [1]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
from  sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score
from sklearn.metrics import zero_one_loss
from sklearn.datasets import load_breast_cancer,load_iris
from sklearn.preprocessing import StandardScaler,PolynomialFeatures

In [2]:
df = load_breast_cancer()

In [3]:
X = df.data
X.shape

(569, 30)

In [4]:
y = df.target
y.shape

(569,)

In [5]:
df.target_names

array(['malignant', 'benign'], dtype='<U9')

In [6]:
df.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.2, random_state=4)

In [8]:
print("The shape of X_train", X_train.shape)
print("\nThe shape of X_train", X_test.shape)
print("\nThe shape of X_train", y_train.shape)
print("\nThe shape of X_train", y_test.shape)

The shape of X_train (455, 30)

The shape of X_train (114, 30)

The shape of X_train (455,)

The shape of X_train (114,)


In [9]:
'''
    That most of hyper parameters that Logistic regression take
    
    linear_model.LogisticRegression(penalty='l2’,dual=False,tol=0.0001,C=1.0,fit_intercept=True,intercept_scaling=1,
    class_weight=None,random_state=None,solver='warn’,max_iter=100,
    multi_class='warn’, verbose=0,warm_start=False, n_jobs=None)
'''
LogReg = LogisticRegression(penalty='l2', C=1.0, solver='sag', max_iter=100, random_state=4)

In [10]:
LogReg.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=4, solver='sag', tol=0.0001, verbose=0,
                   warm_start=False)

In [11]:
pred = LogReg.predict(X_test)

In [12]:
print("First 10 of The predicit vis The actuall\n\n" + str(pred[:10])+ "\n\n"+str(y_test[:10]))

First 10 of The predicit vis The actuall

[0 1 0 0 1 1 1 1 1 1]

[1 1 0 0 1 1 1 1 0 1]


In [13]:
LogReg.score(X_train,y_train)

0.9252747252747253

In [14]:
LogReg.score(X_test, y_test)

0.8859649122807017

In [15]:
LogReg.classes_

array([0, 1])

In [16]:
c_matrix = confusion_matrix(pred, y_test)
print("Our confusion_matrix Table \n\n", c_matrix)

Our confusion_matrix Table 

 [[27  6]
 [ 7 74]]


In [17]:
print("accuracy_score\n\n", accuracy_score(pred, y_test))

accuracy_score

 0.8859649122807017


In [18]:
print("recall_score\n\n", recall_score(pred, y_test))

recall_score

 0.9135802469135802


In [19]:
print("precision_score\n\n", precision_score(pred, y_test))

precision_score

 0.925


In [20]:
print("f1_score\n\n", f1_score(pred, y_test))

f1_score

 0.9192546583850932


In [21]:
df = load_iris()

In [22]:
df.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [23]:
df.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [24]:
X = df.data
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [25]:
y = df.target
y[:5]

array([0, 0, 0, 0, 0])

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4, test_size=.2)

In [27]:
print("The shape of X_train", X_train.shape)
print("\nThe shape of X_train", X_test.shape)
print("\nThe shape of X_train", y_train.shape)
print("\nThe shape of X_train", y_test.shape)

The shape of X_train (120, 4)

The shape of X_train (30, 4)

The shape of X_train (120,)

The shape of X_train (30,)


In [28]:
LogReg2 = LogisticRegression(C=1, penalty='l2', random_state=4, solver='sag', max_iter=100,)

In [29]:
LogReg2.fit(X_train, y_train)



LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=4, solver='sag', tol=0.0001, verbose=0,
                   warm_start=False)

In [30]:
y_pred = LogReg2.predict(X_test)

In [31]:
print("First 10 of The predicit vis The actuall\n\n" + str(y_pred[:10])+ "\n\n"+str(y_test[:10]))

First 10 of The predicit vis The actuall

[2 0 2 2 2 1 2 0 0 1]

[2 0 2 2 2 1 1 0 0 2]


In [32]:
LogReg2.score(X_train,y_train)

0.9666666666666667

In [33]:
LogReg2.score(X_test,y_test)

0.9333333333333333

In [34]:
LogReg2.classes_

array([0, 1, 2])

In [35]:
c_matrix = confusion_matrix(y_pred, y_test)
print("Our confusion_matrix Table \n\n", c_matrix)

Our confusion_matrix Table 

 [[16  0  0]
 [ 0  4  1]
 [ 0  1  8]]


In [36]:
print("accuracy_score\n\n", accuracy_score(y_pred, y_test))

accuracy_score

 0.9333333333333333


In [37]:
print("precision_score\n\n", precision_score(y_pred, y_test, average='micro'))

precision_score

 0.9333333333333333


In [38]:
print("recall_score\n\n", recall_score(y_pred, y_test, average='micro'))

recall_score

 0.9333333333333333


In [39]:
print("f1_score\n\n", f1_score(y_pred, y_test, average='micro'))

f1_score

 0.9333333333333333


In [40]:
df = pd.read_csv('heart.csv')

In [41]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [42]:
X = np.array(df.iloc[:, :-1])

In [43]:
y = np.array(df.iloc[:,-1])

In [44]:
st_sc = StandardScaler()

In [45]:
X = st_sc.fit_transform(X)

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4, test_size=.2)

In [47]:
LogReg3 = LogisticRegression(random_state=4, penalty='l2', solver='liblinear', max_iter=1000)

In [48]:
LogReg3.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=4, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [49]:
pred = LogReg3.predict(X_test)

In [50]:
print("First 10 of The predicit vis The actuall\n\n" + str(pred[:10])+ "\n\n"+str(y_test[:10]))

First 10 of The predicit vis The actuall

[1 0 1 1 1 0 0 1 1 1]

[1 0 1 1 0 0 0 1 1 1]


In [51]:
LogReg3.classes_

array([0, 1])

In [52]:
LogReg3.n_iter_

array([5], dtype=int32)

In [53]:
LogReg3.score(X_train,y_train)

0.8553719008264463

In [54]:
LogReg3.score(X_test,y_test)

0.8688524590163934

In [55]:
c_matrix = confusion_matrix(pred, y_test)
print("Our confusion_matrix Table \n\n", c_matrix)

Our confusion_matrix Table 

 [[22  5]
 [ 3 31]]


In [56]:
print("accuracy_score\n\n", accuracy_score(pred, y_test))

accuracy_score

 0.8688524590163934


In [57]:
print("precision_score\n\n", precision_score(pred, y_test))

precision_score

 0.8611111111111112


In [58]:
print("recall_score\n\n", recall_score(pred, y_test))

recall_score

 0.9117647058823529


In [59]:
print("f1_score\n\n", f1_score(pred, y_test))

f1_score

 0.8857142857142858


In [60]:
from sklearn.metrics import mean_absolute_error

In [61]:
mean_absolute_error(pred,y_test)

0.13114754098360656

In [62]:
df = load_breast_cancer()
X = df.data
y = df.target
X[:5, :3]

array([[ 17.99,  10.38, 122.8 ],
       [ 20.57,  17.77, 132.9 ],
       [ 19.69,  21.25, 130.  ],
       [ 11.42,  20.38,  77.58],
       [ 20.29,  14.34, 135.1 ]])

In [63]:
df.target_names

array(['malignant', 'benign'], dtype='<U9')

In [64]:
df.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [65]:
st_sc = StandardScaler()

In [66]:
X = st_sc.fit_transform(X)

In [67]:
X.shape

(569, 30)

In [68]:
plf = PolynomialFeatures(degree=2, include_bias=True)

In [69]:
X = plf.fit_transform(X)

In [70]:
X.shape

(569, 496)

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4, test_size=.2)

In [72]:
LogReg4 = LogisticRegression(penalty='l2', solver='sag', max_iter=1000, C=.3)

In [73]:
LogReg4.fit(X_train, y_train)



LogisticRegression(C=0.3, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='sag', tol=0.0001, verbose=0,
                   warm_start=False)

In [74]:
pred = LogReg4.predict(X_test)

In [75]:
print("First 10 of The predicit vis The actuall\n\n" + str(pred[:10])+ "\n\n"+str(y_test[:10]))

First 10 of The predicit vis The actuall

[1 1 0 0 1 0 1 1 0 1]

[1 1 0 0 1 1 1 1 0 1]


In [76]:
c_matrix = confusion_matrix(pred, y_test)
print("Our confusion_matrix Table \n\n", c_matrix)

Our confusion_matrix Table 

 [[34  4]
 [ 0 76]]


In [77]:
print("accuracy_score\n\n", accuracy_score(pred, y_test))

accuracy_score

 0.9649122807017544


In [79]:
print("accuracy_score\n\n", precision_recall_fscore_support(pred, y_test))

accuracy_score

 (array([1.  , 0.95]), array([0.89473684, 1.        ]), array([0.94444444, 0.97435897]), array([38, 76]))


In [80]:
BreastData = load_breast_cancer()
#X Data
X = BreastData.data
#y Data
y = BreastData.target

In [81]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

In [82]:
from sklearn.linear_model import SGDClassifier

In [83]:
SGDClassifierModel = SGDClassifier(penalty='l2',loss='squared_loss',learning_rate='optimal',random_state=33)
SGDClassifierModel.fit(X_train, y_train)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='squared_loss',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=33, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

In [84]:
#Calculating Details
print('SGDClassifierModel Train Score is : ' , SGDClassifierModel.score(X_train, y_train))
print('SGDClassifierModel Test Score is : ' , SGDClassifierModel.score(X_test, y_test))
print('SGDClassifierModel loss function is : ' , SGDClassifierModel.loss_function_)
print('SGDClassifierModel No. of iteratios is : ' , SGDClassifierModel.n_iter_)
print('----------------------------------------------------')

SGDClassifierModel Train Score is :  0.6220472440944882
SGDClassifierModel Test Score is :  0.6382978723404256
SGDClassifierModel loss function is :  <sklearn.linear_model.sgd_fast.SquaredLoss object at 0x7f72340497f8>
SGDClassifierModel No. of iteratios is :  77
----------------------------------------------------


In [86]:
#Calculating Prediction
y_pred = SGDClassifierModel.predict(X_test)
print('Predicted Value for SGDClassifierModel is : ' , y_pred[:10])

Predicted Value for SGDClassifierModel is :  [1 1 1 1 1 1 1 1 1 1]


In [87]:
CM = confusion_matrix(y_test, y_pred)
print('Confusion Matrix is : \n', CM)

Confusion Matrix is : 
 [[  0  68]
 [  0 120]]


In [88]:
AccScore = accuracy_score(y_test, y_pred, normalize=False)
print('Accuracy Score is : ', AccScore)

Accuracy Score is :  120


In [89]:
F1Score = f1_score(y_test, y_pred, average='micro') #it can be : binary,macro,weighted,samples
print('F1 Score is : ', F1Score)

F1 Score is :  0.6382978723404256


In [91]:
PrecisionRecallScore = precision_recall_fscore_support(y_test, y_pred, average='micro')
print('Precision Recall Score is : ', PrecisionRecallScore)

Precision Recall Score is :  (0.6382978723404256, 0.6382978723404256, 0.6382978723404256, None)
