In [1]:
from cuml.model_selection import train_test_split
from cuml.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
from xgboost import XGBClassifier
from ImageRecognizerML.CustomModel.CustomVoting import CustomVoting
from sklearn.metrics import classification_report, accuracy_score
import cupy as cp
import cudf as cf
import pickle as pkl

# Getting Dataset

In [2]:
df = cf.read_csv('cifar10.csv')
df_features = cf.read_csv('cifar10_features.csv')

# Scaling Datas

In [3]:
columns = df.columns[:-1]
columns_features = df_features.columns[:-1]

scaler = StandardScaler()
df[columns] = scaler.fit_transform(df[columns])
df_features[columns_features] = scaler.fit_transform(df_features[columns_features])

# Removing Outliers

In [4]:
X = df.iloc[:, :-1].values.astype(cp.float64)
y = df.iloc[:, -1].values.astype(cp.int8)
X_features = df_features.iloc[:, :-1].values.astype(cp.float64)
y_features = df_features.iloc[:, -1].values.astype(cp.int8)

lof = LocalOutlierFactor(n_neighbors=5, contamination=0.02, n_jobs=-1)
lof_outliers = lof.fit_predict(X.get())
lof_outliers = cp.array(lof_outliers)

outliers_idx = cp.where(lof_outliers == -1)

X = cp.delete(X, outliers_idx, axis=0)
y = cp.delete(y, outliers_idx, axis=0)
X_features = cp.delete(X_features, outliers_idx, axis=0)
y_features = cp.delete(y_features, outliers_idx, axis=0)

# Training Model

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_features, X_test_features, y_train_features, y_test_features = train_test_split(X_features, y_features, test_size=0.2, random_state=42)

model = CustomVoting(models=[
    XGBClassifier(n_estimators=900, learning_rate=0.3, max_depth=6, device='cuda'), 
    XGBClassifier(n_estimators=2000, learning_rate=0.2, max_depth=5, device='cuda')
], weights=[0.7, 0.3])
model.fit([{'X_train': X_train, 'y_train': y_train}, {'X_train': X_train_features, 'y_train': y_train_features}])

y_pred = model.predict([X_test, X_test_features])

print(accuracy_score(y_test.get(), y_pred))
print(classification_report(y_test.get(), y_pred))

Training Models Started
Training Model 1 Done
Training Model 2 Done
Training Models Finished
Predicting Models Started
Predicting Models Finished
0.6005952380952381
              precision    recall  f1-score   support

           0       0.64      0.63      0.64      1250
           1       0.69      0.69      0.69      1122
           2       0.51      0.48      0.49      1168
           3       0.45      0.44      0.45      1173
           4       0.52      0.49      0.50      1160
           5       0.52      0.50      0.51      1170
           6       0.62      0.71      0.66      1219
           7       0.66      0.65      0.65      1155
           8       0.72      0.76      0.74      1198
           9       0.64      0.67      0.65      1145

    accuracy                           0.60     11760
   macro avg       0.60      0.60      0.60     11760
weighted avg       0.60      0.60      0.60     11760

