In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv("./iris.csv", names=['sw', 'sl', 'pw', 'pl', 'target'])

In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   sw      150 non-null    float64
 1   sl      150 non-null    float64
 2   pw      150 non-null    float64
 3   pl      150 non-null    float64
 4   target  150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [4]:
features = dataset.iloc[:, :-1].values[:100]
labels = dataset.iloc[:, -1].values[:100]

In [5]:
from sklearn.feature_selection import SelectKBest, f_classif
features = SelectKBest(f_classif, 2).fit_transform(features, labels)



In [6]:
from sklearn.preprocessing import LabelEncoder
labels = LabelEncoder().fit_transform(labels)

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(features, labels)

In [9]:
# X_train

In [10]:
#Metrics
from sklearn.metrics import classification_report, confusion_matrix

class metrics_:
     
    def __init__(self, y_test, y_pred):
        self.y_test = y_test
        self.y_pred = y_pred
        
    def cr(self):
        print(classification_report(self.y_pred, self.y_test))
        
    def cm(self):
        print(confusion_matrix(self.y_pred, self.y_test))

# Algorithmns

In [11]:
# Logistic Regression 
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression().fit(X_train, y_train)

In [12]:
y_predlr = lr.predict(X_test)

In [13]:
y_predlr_ = [1 if val >= 0.5 else 0 for val in y_predlr]

In [14]:
reports = metrics_(y_test, y_predlr_)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [15]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier().fit(X_train, y_train)

In [16]:
ypredictdr = dt.predict(X_test)

In [17]:
reports = metrics_(y_test, ypredictdr)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [18]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier().fit(X_train, y_train)
ypredrf = rf.predict(X_test)
reports = metrics_(y_test, ypredrf)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [19]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier().fit(X_train, y_train)
ypredknn = knn.predict(X_test)
reports = metrics_(y_test, ypredknn)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [20]:
from sklearn.svm import SVC
svm = SVC().fit(X_train, y_train)
ypredsvm = svm.predict(X_test)
reports = metrics_(y_test, ypredsvm)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [21]:
from sklearn.ensemble import VotingClassifier, BaggingClassifier

algos = [('lg', LogisticRegression()), ('smv', SVC(probability=True)), 
         ('knn', KNeighborsClassifier()), ('rf', RandomForestClassifier())]

In [22]:
vc = VotingClassifier(estimators=algos, voting='soft')

In [23]:
vc.fit(X_train, y_train)

VotingClassifier(estimators=[('lg', LogisticRegression()),
                             ('smv', SVC(probability=True)),
                             ('knn', KNeighborsClassifier()),
                             ('rf', RandomForestClassifier())],
                 voting='soft')

In [24]:
vcpred = vc.predict(X_test)

In [25]:
reports = metrics_(y_test, vcpred)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [26]:
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier

In [28]:
bc = BaggingClassifier(base_estimator=RandomForestClassifier()).fit(X_train, y_train)
ypredbg = bc.predict(X_test)
reports = metrics_(y_test, ypredbg)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [29]:
xgb = XGBClassifier().fit(X_train, y_train)
ypredxgb = xgb.predict(X_test)
reports = metrics_(y_test, ypredbg)
reports.cr()
reports.cm()



              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[13  0]
 [ 0 12]]


In [30]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [31]:
# boston_house dataset price column as target or last column as target
# https://www.kaggle.com/datasets/fedesoriano/the-boston-houseprice-data