Data Treatment

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('/src/dataset/Iris.csv')

In [3]:
print(df.isna().sum())

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64


In [4]:
print(df.duplicated().sum())

0


In [5]:
df = df.drop(columns=['Id'])

In [6]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None


In [7]:
X = df.drop(columns=['Species'])

In [8]:
Y = df['Species']

In [9]:
le = LabelEncoder()
Y = le.fit_transform(Y)
print(Y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state = 42)

LightGBM Training

In [11]:
from lightgbm import LGBMClassifier

In [12]:
LightGBMModel = LGBMClassifier(objective='multiclass', random_state=42, verbose=-1)

In [13]:
LightGBMModel.fit(X_train, Y_train)

In [14]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [15]:
LightGBMPredictions = LightGBMModel.predict(X_test)

In [16]:
accuracy = accuracy_score(Y_test, LightGBMPredictions)
report = classification_report(Y_test, LightGBMPredictions)
confusion = confusion_matrix(Y_test, LightGBMPredictions)

print("Accuracy:\n", accuracy)
print("Confusion Matrix:\n", confusion)
print("Relatory:\n", report)

Accuracy:
 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Relatory:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



XGBoost Training

In [17]:
from xgboost import XGBClassifier

In [18]:
XGBoostModel = XGBClassifier(n_estimators=500, objective='multi:softmax', num_class=3, random_state=42)

In [19]:
XGBoostModel.fit(X_train, Y_train)

In [20]:
XGBoostPredictions = XGBoostModel.predict(X_test)

In [21]:
accuracy = accuracy_score(Y_test, XGBoostPredictions)
report = classification_report(Y_test, XGBoostPredictions)
confusion = confusion_matrix(Y_test, XGBoostPredictions)

print("Accuracy:\n", accuracy)
print("Confusion Matrix:\n", confusion)
print("Relatory:\n", report)

Accuracy:
 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Relatory:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



Decision Tree Training

In [22]:
from sklearn.tree import DecisionTreeClassifier

In [23]:
DecisionTreeModel = DecisionTreeClassifier(random_state=42)

In [24]:
DecisionTreeModel.fit(X_train, Y_train)

In [25]:
DecisionTreePredictions = DecisionTreeModel.predict(X_test)

In [26]:
accuracy = accuracy_score(Y_test, DecisionTreePredictions)
report = classification_report(Y_test, DecisionTreePredictions)
confusion = confusion_matrix(Y_test, DecisionTreePredictions)

print("Accuracy:\n", accuracy)
print("Confusion Matrix:\n", confusion)
print("Relatory:\n", report)

Accuracy:
 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Relatory:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



kNN Training

In [27]:
from sklearn.neighbors import KNeighborsClassifier

In [28]:
kNNModel = KNeighborsClassifier(n_neighbors=7)

In [29]:
kNNModel.fit(X_train, Y_train)

In [30]:
kNNPredictions = kNNModel.predict(X_test)

In [31]:
accuracy = accuracy_score(Y_test, kNNPredictions)
report = classification_report(Y_test, kNNPredictions)
confusion = confusion_matrix(Y_test, kNNPredictions)

print("Accuracy:\n", accuracy)
print("Confusion Matrix:\n", confusion)
print("Relatory:\n", report)

Accuracy:
 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
Relatory:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30



Saving The Models

In [32]:
import joblib

In [33]:
XGBoostModel.save_model('/src/models/XGBoostModel.json')

In [None]:
joblib.dump(LightGBMModel, '/src/models/LightGBMModel.pkl')

In [None]:
joblib.dump(DecisionTreeModel, '/src/models/DecisionTreeModel.joblib', compress=3)

In [None]:
joblib.dump(kNNModel, '/src/models/kNNModel.pkl')