# Logistic Regression

In [1]:
import pandas as pd
import numpy as np

In [2]:
iris = pd.read_csv(r"iris_cleaned.csv")
iris = iris.drop(columns=['Unnamed: 0'])

In [3]:
X = iris[iris.columns[:4]]
y = iris[['class']]

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
df_train, df_test = train_test_split(iris, test_size=0.3, random_state=100)
y_train = y_train
y_test = y_test

In [5]:
from sklearn import preprocessing as ppr
lab_en = ppr.LabelEncoder()
y_train['class'] = lab_en.fit_transform(y_train['class'])        # updating the class as encoded targets
y_test['class'] = lab_en.fit_transform(y_test['class'])

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

model = LogisticRegression()

In [7]:
from sklearn.pipeline import Pipeline
# Define the pipeline
pipe = Pipeline([
    ('std', StandardScaler()),  # Standardization
    ('logistic regression', model)  # ANN model as the final classifier
], verbose=True)

In [8]:
pipe.fit(X_train,y_train)

[Pipeline] ............... (step 1 of 2) Processing std, total=   0.0s
[Pipeline]  (step 2 of 2) Processing logistic regression, total=   0.0s


  y = column_or_1d(y, warn=True)


In [9]:
y_pred = pipe.predict(X_test)

In [10]:
from sklearn.metrics import classification_report, confusion_matrix
print("Confusion Matrix\n\n",confusion_matrix(y_test,y_pred))
print("\nClassification Report for Logistic Regression\n\n",classification_report(y_test, y_pred))

Confusion Matrix

 [[12  0  0]
 [ 0  7  1]
 [ 0  1  9]]

Classification Report for Logistic Regression

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.88      0.88      0.88         8
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.92      0.92      0.92        30
weighted avg       0.93      0.93      0.93        30



# CatBoost

In [11]:
from catboost import CatBoostClassifier

In [12]:
X = iris[iris.columns[:4]]
y = iris[['class']]
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
df_train, df_test = train_test_split(iris, test_size=0.3, random_state=100)
y_train = y_train
y_test = y_test

from sklearn import preprocessing as ppr
lab_en = ppr.LabelEncoder()
y_train['class'] = lab_en.fit_transform(y_train['class'])        # updating the class as encoded targets
y_test['class'] = lab_en.fit_transform(y_test['class'])

In [13]:
# Define the pipeline
pipe_catboost = Pipeline([
    ('std', StandardScaler()),  # Standardization
    ('categorical boosting', CatBoostClassifier(verbose=0))  # ANN model as the final classifier
], verbose=True)

In [14]:
pipe_catboost.fit(X_train,y_train)

[Pipeline] ............... (step 1 of 2) Processing std, total=   0.0s
[Pipeline]  (step 2 of 2) Processing categorical boosting, total=   1.7s


In [15]:
y_pred_catboost = pipe_catboost.predict(X_test)

In [16]:
print("Confusion Matrix\n\n",confusion_matrix(y_test,y_pred_catboost))
print("\nClassification Report for CatBoost\n\n",classification_report(y_test, y_pred_catboost))

Confusion Matrix

 [[12  0  0]
 [ 0  7  1]
 [ 0  1  9]]

Classification Report for CatBoost

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.88      0.88      0.88         8
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.92      0.92      0.92        30
weighted avg       0.93      0.93      0.93        30



# Ensemble of Ensembles

In [17]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier


In [18]:
# Create base learners (first layer of ensemble)
base_learners = [
    ('catboost', Pipeline([('scaler', StandardScaler()), ('catboost', CatBoostClassifier(verbose=0))])),
    ('random_forest', Pipeline([('scaler', StandardScaler()), ('rf', RandomForestClassifier())])),
    ('gradient_boost', Pipeline([('scaler', StandardScaler()), ('gb', GradientBoostingClassifier())]))
]

In [19]:
# Create a stacking classifier (ensemble of ensembles) with Logistic Regression as the meta-learner
stacking_ensemble = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression())


In [20]:
# Train the model
stacking_ensemble.fit(X_train, y_train)

# Predict on the test set
y_pred_ensemble = stacking_ensemble.predict(X_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [21]:
print("Confusion Matrix\n\n",confusion_matrix(y_test,y_pred_catboost))
print("\nClassification Report for Ensemble of Ensembles\n\n",classification_report(y_test, y_pred_ensemble))

Confusion Matrix

 [[12  0  0]
 [ 0  7  1]
 [ 0  1  9]]

Classification Report for Ensemble of Ensembles

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.88      0.88      0.88         8
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.92      0.92      0.92        30
weighted avg       0.93      0.93      0.93        30



# Artificial Neural Network

In [None]:
X = iris[iris.columns[:4]]
y = iris[['class']]
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [24]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

encoder = OneHotEncoder(sparse_output=False)
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

In [25]:
from keras.models import Sequential
from keras.layers import Dense

def create_ann_model():
    model = Sequential()
    model.add(Dense(3, input_dim = 4, activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [26]:
from scikeras.wrappers import KerasClassifier
# Wrapping the Keras model for compatibility with scikit-learn
ann_model = KerasClassifier(build_fn=create_ann_model, epochs=300, batch_size=8, verbose=0)


In [27]:
from sklearn.pipeline import Pipeline
# Define the pipeline
pipe = Pipeline([
    ('ann', ann_model)  # ANN model as the final classifier
], verbose=True)

In [28]:
pipe.fit(X_train,y_train)

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[Pipeline] ............... (step 1 of 1) Processing ann, total=  16.4s


In [None]:
# Make predictions on the test set using the pipeline
y_test_pred = pipe.predict(X_test)  # Get probabilities
y_pred_ann = np.argmax(y_test_pred, axis=1)  # Convert probabilities to class labels


In [32]:
y_test= np.argmax(y_test, axis=1)

In [36]:
print("Confusion Matrix\n\n",confusion_matrix(y_test,y_pred_ann))
print("\nClassification Report for Ensemble of Ensembles\n\n",classification_report(y_test, y_pred_ann))

Confusion Matrix

 [[12  0  0]
 [ 0  7  1]
 [ 0  0 10]]

Classification Report for Ensemble of Ensembles

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      0.88      0.93         8
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30

