In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
data = pd.read_csv("./last_data.csv")

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,0,0,5.1,3.5,1.4,0.2,Iris-setosa
1,1,1,4.7,3.2,1.6,0.2,Iris-setosa
2,2,2,4.9,3.1,1.5,0.1,Iris-setosa
3,3,3,4.4,2.9,1.4,0.2,Iris-setosa
4,4,4,5.0,3.4,1.5,0.2,Iris-setosa


In [None]:
data.drop("Unnamed: 0", axis=1, inplace=True)
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,0,5.1,3.5,1.4,0.2,Iris-setosa
1,1,4.7,3.2,1.6,0.2,Iris-setosa
2,2,4.9,3.1,1.5,0.1,Iris-setosa
3,3,4.4,2.9,1.4,0.2,Iris-setosa
4,4,5.0,3.4,1.5,0.2,Iris-setosa


## Label Encoding

In [None]:
le = LabelEncoder()

In [None]:
target = le.fit_transform(data["Species"])

In [None]:
data["Species"] = target
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,0,5.1,3.5,1.4,0.2,0
1,1,4.7,3.2,1.6,0.2,0
2,2,4.9,3.1,1.5,0.1,0
3,3,4.4,2.9,1.4,0.2,0
4,4,5.0,3.4,1.5,0.2,0


## Checks before model creation

In [None]:
data.isna().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [None]:
data.dtypes

Id                 int64
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species            int32
dtype: object

In [None]:
#Id kolonuna artık ihtiyacımız kalmadı
data.drop("Id", axis=1, inplace=True)

In [None]:
# Her şey iyi gözüküyor
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.7,3.2,1.6,0.2,0
2,4.9,3.1,1.5,0.1,0
3,4.4,2.9,1.4,0.2,0
4,5.0,3.4,1.5,0.2,0


## Setting up training and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=0.2)

In [None]:
display(X_train.shape)
X_test.shape

(123, 4)

(31, 4)

In [None]:
y_train.value_counts()

2    47
1    39
0    37
Name: Species, dtype: int64

In [None]:
y_test.value_counts()

1    14
0    10
2     7
Name: Species, dtype: int64

## Creating the Model

In [None]:
import xgboost as xgb
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
xgb_cls = xgb.XGBClassifier(objective="multi:softmax", num_class=3)

In [None]:
xgb_cls.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints=None,
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=0, num_class=3, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method=None, validate_parameters=False, verbosity=None)

In [None]:
preds = xgb_cls.predict(X_test)

In [None]:
accuracy_score(y_test, preds)

0.9032258064516129

In [None]:
confusion_matrix(y_test, preds)

array([[10,  0,  0],
       [ 0, 11,  3],
       [ 0,  0,  7]], dtype=int64)