# Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
import warnings
warnings.simplefilter(action="ignore")

In [3]:
iris = datasets.load_iris()
# X = iris.data[:, :2]
# y = iris.target

In [4]:
x=iris.data
x=pd.DataFrame(data=x,columns=iris.feature_names)
x.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
115,6.4,3.2,5.3,2.3
122,7.7,2.8,6.7,2.0
68,6.2,2.2,4.5,1.5
74,6.4,2.9,4.3,1.3
14,5.8,4.0,1.2,0.2


In [5]:
y=iris.target
y=pd.DataFrame(data=y,columns=['label'])
y.sample(5)

Unnamed: 0,label
145,2
57,1
66,1
81,1
147,2


# Pre Processing

In [6]:
x_scaler=StandardScaler().fit_transform(x)
x_scaler=pd.DataFrame(data=x_scaler)
x_scaler.sample(5)

Unnamed: 0,0,1,2,3
71,0.310998,-0.592373,0.137547,0.13251
122,2.249683,-0.592373,1.672157,1.053935
59,-0.779513,-0.82257,0.080709,0.264142
145,1.038005,-0.131979,0.819596,1.448832
75,0.916837,-0.131979,0.364896,0.264142


In [7]:
x_train,x_test,y_train,y_test=train_test_split(x_scaler,y,test_size=0.3,random_state=42)

In [8]:
classifier=LogisticRegression(solver="liblinear")
model=classifier.fit(x_train,y_train)

# Prediction

In [9]:
pred_y=model.predict(x_test)

# confusion_matrix, accuracy_score, roc_auc_score

In [10]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
cm = confusion_matrix(y_test, pred_y)
cm

array([[19,  0,  0],
       [ 0,  8,  5],
       [ 0,  0, 13]], dtype=int64)

In [11]:
accuracy_score(y_test, pred_y)

0.8888888888888888

In [12]:
roc_auc_score(y,model.predict_proba(x),multi_class='ovr')

0.7920000000000001

# Trying diff solver and diff seed value

# horizontal representation

In [13]:
solverSet=['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
seedValue=[42,50,60,70]

In [14]:
task=dict()

In [15]:
count=0
for i in seedValue:
  x_train,x_test,y_train,y_test=train_test_split(x_scaler,y,test_size=0.3,random_state=i)
  for j in solverSet:
    classifier=LogisticRegression(solver=j)
    model=classifier.fit(x_train,y_train)
    pred_y=model.predict(x_test)
    score=accuracy_score(y_test, pred_y)
    task[count]={
        'seedValue':i,
        'solver':j,
        'score':score
    }
    count+=1

In [16]:
task

{0: {'seedValue': 42, 'solver': 'lbfgs', 'score': 1.0},
 1: {'seedValue': 42, 'solver': 'liblinear', 'score': 0.8888888888888888},
 2: {'seedValue': 42, 'solver': 'newton-cg', 'score': 1.0},
 3: {'seedValue': 42,
  'solver': 'newton-cholesky',
  'score': 0.9111111111111111},
 4: {'seedValue': 42, 'solver': 'sag', 'score': 1.0},
 5: {'seedValue': 42, 'solver': 'saga', 'score': 1.0},
 6: {'seedValue': 50, 'solver': 'lbfgs', 'score': 0.9777777777777777},
 7: {'seedValue': 50, 'solver': 'liblinear', 'score': 0.8666666666666667},
 8: {'seedValue': 50, 'solver': 'newton-cg', 'score': 0.9777777777777777},
 9: {'seedValue': 50,
  'solver': 'newton-cholesky',
  'score': 0.9111111111111111},
 10: {'seedValue': 50, 'solver': 'sag', 'score': 0.9777777777777777},
 11: {'seedValue': 50, 'solver': 'saga', 'score': 0.9777777777777777},
 12: {'seedValue': 60, 'solver': 'lbfgs', 'score': 0.9111111111111111},
 13: {'seedValue': 60, 'solver': 'liblinear', 'score': 0.8888888888888888},
 14: {'seedValue': 6

In [17]:
task_df=pd.DataFrame(data=task)

In [18]:
task_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
seedValue,42,42,42,42,42,42,50,50,50,50,...,60,60,60,60,70,70,70,70,70,70
solver,lbfgs,liblinear,newton-cg,newton-cholesky,sag,saga,lbfgs,liblinear,newton-cg,newton-cholesky,...,newton-cg,newton-cholesky,sag,saga,lbfgs,liblinear,newton-cg,newton-cholesky,sag,saga
score,1.0,0.888889,1.0,0.911111,1.0,1.0,0.977778,0.866667,0.977778,0.911111,...,0.911111,0.888889,0.911111,0.911111,1.0,0.933333,1.0,0.955556,1.0,1.0


# vertical representation

In [19]:
sd=[]
sold=[]
scd=[]

In [20]:
count=0
for i in seedValue:
  x_train,x_test,y_train,y_test=train_test_split(x_scaler,y,test_size=0.3,random_state=i)
  for j in solverSet:
    classifier=LogisticRegression(solver=j)
    model=classifier.fit(x_train,y_train)
    pred_y=model.predict(x_test)
    # score=accuracy_score(y_test, pred_y)
    sd.append(i)
    sold.append(j)
    scd.append(accuracy_score(y_test, pred_y))

In [21]:
task2={
    'seed Value':sd,
    'solver Value':sold,
    'accuracy score':scd
}

In [22]:
task2_df=pd.DataFrame(data=task2)

In [23]:
task2_df

Unnamed: 0,seed Value,solver Value,accuracy score
0,42,lbfgs,1.0
1,42,liblinear,0.888889
2,42,newton-cg,1.0
3,42,newton-cholesky,0.911111
4,42,sag,1.0
5,42,saga,1.0
6,50,lbfgs,0.977778
7,50,liblinear,0.866667
8,50,newton-cg,0.977778
9,50,newton-cholesky,0.911111


# classification report

In [24]:
from sklearn.metrics import classification_report

In [25]:
print(classification_report(y_test, pred_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        16
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

