In [None]:
####################################################################################################################
# Competition   : Dry Beans Classification
# Team Members  : 1- Abdullah Abdelhakeem
#                 2- Mohamed Sebaie                   
#                 3- Mohamed Moustafa
#                 4- Ossama Ahmed
#                 5- Mahmoud Osama
#                 6- Hazem
#
# Problem :       Supervised Classification
# version :       0.0.1
#
###################################################################################################################

# Dataset Information



Given a set of features extracted from the shape of the beans in images and  it's required to predict the class of a bean given some features about its shape.
There are 7 bean types in this dataset.

**Data fields**
- ID - an ID for this instance
- Area - (A), The area of a bean zone and the number of pixels within its boundaries.
- Perimeter - (P), Bean circumference is defined as the length of its border.
- MajorAxisLength - (L), The distance between the ends of the longest line that can be drawn from a bean.
- MinorAxisLength - (l), The longest line that can be drawn from the bean while standing perpendicular to the main axis.
- AspectRatio - (K), Defines the relationship between L and l.
- Eccentricity - (Ec), Eccentricity of the ellipse having the same moments as the region.
- ConvexArea - (C), Number of pixels in the smallest convex polygon that can contain the area of a bean seed.
- EquivDiameter - (Ed), The diameter of a circle having the same area as a bean seed area.
- Extent - (Ex), The ratio of the pixels in the bounding box to the bean area.
- Solidity - (S), Also known as convexity. The ratio of the pixels in the convex shell to those found in beans.
- Roundness - (R), Calculated with the following formula: (4piA)/(P^2)
- Compactness - (CO), Measures the roundness of an object: Ed/L
- ShapeFactor1 - (SF1)
- ShapeFactor2 - (SF2)
- ShapeFactor3 - (SF3)
- ShapeFactor4 - (SF4)
- y - the class of the bean. It can be any of BARBUNYA, SIRA, HOROZ, DERMASON, CALI, BOMBAY, and SEKER.


<img src= "https://www.thespruceeats.com/thmb/eeIti36pfkoNBaipXrTHLjIv5YA=/1888x1416/smart/filters:no_upscale()/DriedBeans-56f6c2c43df78c78418c3b46.jpg" alt ="Titanic" style='width: 800px;height:400px'>

# Import Required Libraries

In [132]:
# !pip install catboost

In [133]:
# pip install mlxtend

In [134]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
#####################################################################
# for visualizations
from pandas import plotting
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.style.use('fivethirtyeight')
sns.set(rc={'figure.figsize': [7, 7]}, font_scale=1.2)
###########################################################
%config Completer.use_jedi=False
pd.set_option("display.max_columns", None)

# ignore the warnings
import warnings
warnings.filterwarnings("ignore")
np.set_printoptions(suppress=True)

# for path
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Read Train and Test Data

In [135]:
# dataset_path = '/kaggle/input/dry-beans-classification-iti-ai-pro-intake01/'
# dfTrainO = pd.read_csv(os.path.join(dataset_path, 'train.csv'))
# dfTestO = pd.read_csv(os.path.join(dataset_path, 'test.csv'))
# print("The shape of the dataset is {}.\n\n".format(dfTrainO.shape))
# print("The shape of the dataset is {}.\n\n".format(dfTestO.shape))
# dfTrainO.head()

In [136]:
dfTrainO=pd.read_csv("train.csv")
print('---'*30)
print('Train Head')
print('---'*30)
display(dfTrainO.head())
dfTestO=pd.read_csv("test.csv")
print('---'*30)
print('Test Head')
print('---'*30)
display(dfTestO.head())

------------------------------------------------------------------------------------------
Train Head
------------------------------------------------------------------------------------------


Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,y
0,0,58238,971.303,397.202654,186.94551,2.124697,0.882317,58977,272.306674,0.604756,0.98747,0.775723,0.685561,0.00682,0.000929,0.469994,0.998595,HOROZ
1,1,44515,757.601,265.59047,213.967453,1.241266,0.59242,44780,238.07196,0.771745,0.994082,0.97462,0.896387,0.005966,0.002376,0.80351,0.99737,SEKER
2,2,31662,653.783,239.902428,168.421505,1.424417,0.712136,32034,200.781748,0.801407,0.988387,0.930853,0.836931,0.007577,0.002293,0.700453,0.997737,DERMASON
3,3,38778,734.02,268.446281,184.061923,1.458456,0.727925,39208,222.201897,0.766137,0.989033,0.904439,0.827733,0.006923,0.002005,0.685142,0.99925,DERMASON
4,4,42530,775.403,281.212824,193.236878,1.455275,0.726511,43028,232.703412,0.729816,0.988426,0.888895,0.827499,0.006612,0.001912,0.684755,0.996507,SIRA


------------------------------------------------------------------------------------------
Test Head
------------------------------------------------------------------------------------------


Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
0,10834,57659,955.434,387.757607,196.625782,1.972059,0.861896,60188,270.949661,0.62079,0.957982,0.793735,0.69876,0.006725,0.000989,0.488266,0.962889
1,10835,28772,630.362,229.990785,159.609367,1.44096,0.719993,29127,191.399185,0.767458,0.987812,0.909913,0.832204,0.007994,0.002365,0.692563,0.997956
2,10836,54677,911.022,308.853903,226.398571,1.364204,0.680198,55858,263.850182,0.753013,0.978857,0.82786,0.854288,0.005649,0.001856,0.729808,0.995607
3,10837,24827,578.304,214.192699,147.788172,1.449322,0.723831,25121,177.794033,0.716508,0.988297,0.932871,0.830066,0.008627,0.002526,0.689009,0.998596
4,10838,141850,1432.847,524.923471,346.91092,1.513136,0.750492,143680,424.981211,0.804617,0.987263,0.86824,0.809606,0.003701,0.000981,0.655462,0.991803


In [137]:
dfTestO

Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
0,10834,57659,955.434,387.757607,196.625782,1.972059,0.861896,60188,270.949661,0.620790,0.957982,0.793735,0.698760,0.006725,0.000989,0.488266,0.962889
1,10835,28772,630.362,229.990785,159.609367,1.440960,0.719993,29127,191.399185,0.767458,0.987812,0.909913,0.832204,0.007994,0.002365,0.692563,0.997956
2,10836,54677,911.022,308.853903,226.398571,1.364204,0.680198,55858,263.850182,0.753013,0.978857,0.827860,0.854288,0.005649,0.001856,0.729808,0.995607
3,10837,24827,578.304,214.192699,147.788172,1.449322,0.723831,25121,177.794033,0.716508,0.988297,0.932871,0.830066,0.008627,0.002526,0.689009,0.998596
4,10838,141850,1432.847,524.923471,346.910920,1.513136,0.750492,143680,424.981211,0.804617,0.987263,0.868240,0.809606,0.003701,0.000981,0.655462,0.991803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2704,13538,66833,1018.336,400.397077,214.346215,1.867992,0.844640,67787,291.709476,0.759604,0.985927,0.809876,0.728550,0.005991,0.001041,0.530786,0.991504
2705,13539,40572,725.331,250.714014,206.471935,1.214276,0.567264,40900,227.283688,0.767479,0.991980,0.969089,0.906546,0.006179,0.002574,0.821825,0.997923
2706,13540,61226,1008.378,408.227467,199.385082,2.047432,0.872611,63901,279.204879,0.611648,0.958138,0.756657,0.683944,0.006668,0.000900,0.467780,0.957748
2707,13541,34238,698.847,255.829224,170.867099,1.497241,0.744255,34724,208.789788,0.724890,0.986004,0.880956,0.816130,0.007472,0.002045,0.666067,0.997263


In [138]:
dfTestO[dfTestO["ID"]==10834]

Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
0,10834,57659,955.434,387.757607,196.625782,1.972059,0.861896,60188,270.949661,0.62079,0.957982,0.793735,0.69876,0.006725,0.000989,0.488266,0.962889


In [139]:
dfTrainO[dfTrainO["Area"]==42008]

Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,y
131,131,42008,759.454,280.332717,191.218136,1.466036,0.731248,42419,231.270938,0.71171,0.990311,0.915248,0.824987,0.006673,0.001907,0.680604,0.99779,DERMASON
2861,2861,42008,738.016,248.360274,215.662286,1.151617,0.495961,42361,231.270938,0.784683,0.991667,0.969193,0.931191,0.005912,0.002742,0.867117,0.998587,SEKER
3805,3805,42008,773.158,294.492203,181.847359,1.619447,0.786575,42547,231.270938,0.69223,0.987332,0.883091,0.785321,0.00701,0.001645,0.616729,0.99876,DERMASON


__________________________________________________

In [185]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(dfTrainNEW, test_size=0.33, random_state=0,stratify =dfTrainNEW['y'])
X_train = train_df.drop(columns=["ID","y"])
y_train = train_df['y']
################################################################################################
X_val = val_df.drop(columns=["ID","y"])
y_val = val_df['y']

In [188]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

# X_train=normalize(X_train)
# X_val=normalize(X_val)
# dfTest=normalize(dfTest)

# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# scaler.fit(X_train)
# X_train = scaler.transform(X_train)
# X_val = scaler.transform(X_val)

# from sklearn.preprocessing import minmax_scale
# X_train = minmax_scale(X_train, feature_range=(0, 1))
# X_val = minmax_scale(X_val, feature_range=(0, 1))

# from sklearn.preprocessing import RobustScaler
# rb = RobustScaler()
# X_train= rb.fit_transform(X_train)
# X_val = rb.fit_transform(X_val)

from sklearn.preprocessing import StandardScaler
scaler  = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

In [189]:
# pip install -U imbalanced-learn


In [None]:
from imblearn.over_sampling import SMOTE
smt = SMOTE(random_state=0)
X_train, y_train = smt.fit_resample(X_train, y_train)
np.unique(y_SMOTE , return_counts=True)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
models = {
    # "LogisticRegression":           LogisticRegression(),
    "KNeighborsClassifier":         KNeighborsClassifier(), 
    "DecisionTreeClassifier":       DecisionTreeClassifier(),
    "SupportVectorMachine":         SVC(C=1.0,kernel='rbf',gamma='auto'),
    "LinearDiscriminantAnalysis":   LinearDiscriminantAnalysis(),
    "GaussianNB":                   GaussianNB(),
    "SGDClassifier":                SGDClassifier(),
    "RandomForestClassifier":       RandomForestClassifier(),
    "BaggingClassifier":            BaggingClassifier(),
    "CatBoostClassifier":           CatBoostClassifier(verbose=False,loss_function='MultiClass'),
    "LGBMClassifier":               LGBMClassifier(),
    "GradientBoostingClassifier":   GradientBoostingClassifier(),
    "XGBClassifier":                XGBClassifier(eval_metric='mlogloss')
}

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
for name, model in models.items():
    print(f'Using model: {name}')
    print('-'*30)
    model.fit(X_train, y_train)
    y_trainhat = model.predict(X_train)
    y_valhat = model.predict(X_val)
    acc = accuracy_score(y_val, y_valhat)
    # print(f'Train_Report: {classification_report(y_train,y_trainhat)}')
    print(f'Train_Accuracy: {accuracy_score(y_train,y_trainhat)}')
    # print(f'Validation_Report: {classification_report(y_val,y_valhat)}')
    print(f'Validation_Accuracy: {accuracy_score(y_val,y_valhat)}')
    print('**'*30)

In [None]:
list(set(dfTrainNEW.columns) - set(dfTestNEW.columns))

In [None]:
dfTestNEW.head(2)

## Test Data

In [None]:
# ############################################
# TestYTrue= pd.read_csv("yTest.csv")
# yTrueTest=TestYTrue["y"]
# ############################################

ID=dfTestNEW["ID"]


In [None]:
dfTestNEW.shape

In [None]:
dfTestNEW.loc[: , dfTestNEW.columns!="ID"]

In [None]:
X_test = dfTestNEW.loc[: , dfTestNEW.columns!="ID"]


In [None]:
# X_test

In [None]:
X_test=scaler.transform(X_test)

In [None]:
Model=CatBoostClassifier(iterations=700, learning_rate=0.2,verbose=False)
Model.fit(X_train, y_train)

y_trainhat = Model.predict(X_train)
y_valhat = Model.predict(X_val)
acc = accuracy_score(y_val, y_valhat)
# print(f'Train_Report: {classification_report(y_train,y_trainhat)}')
print(f'Train_Accuracy: {accuracy_score(y_train,y_trainhat)}')
# print(f'Validation_Report: {classification_report(y_val,y_valhat)}')
print(f'Validation_Accuracy: {accuracy_score(y_val,y_valhat)}')
print('**'*30)

In [None]:
y_train

In [None]:
y_test_predicted = Model.predict(X_test)

dfTestO['y'] = y_test_predicted

dfTestO.drop(columns="ID",inplace=True)
dfTest=pd.concat([dfTestO, ID],axis=1)

#dfTestEncoded2[['ID', 'y']].to_csv('/kaggle/working/submission.csv', index=False)
dfTest[['ID', 'y']].to_csv('submission.csv', index=False)

In [None]:
dfTest[['ID', 'y']]

# Test ActualData vs TestData

In [None]:
#DifferenceActualTest.csv
testAcual = pd.read_csv("DifferenceActualTest.csv" , )

In [117]:
testAcual.columns

Index(['Unnamed: 0', 'Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength',
       'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent',
       'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2',
       'ShapeFactor3', 'ShapeFactor4', 'Class'],
      dtype='object')

In [115]:
from imblearn.over_sampling import SMOTE
smt = SMOTE(random_state=0)
X_train, y_train = smt.fit_resample(X_train, y_train)
np.unique(y_SMOTE , return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6]),
 array([1900, 1900, 1900, 1900, 1900, 1900, 1900], dtype=int64))

In [116]:
testAcual["Class"]

0           SEKER
1           SEKER
2           SEKER
3           SEKER
4           SEKER
           ...   
13630    DERMASON
13631    DERMASON
13632    DERMASON
13633    DERMASON
13634    DERMASON
Name: Class, Length: 13635, dtype: object

### END Test

__________________________________________________

# DeepLearning

In [None]:
from keras.utils import np_utils

In [None]:
x=dfTrainNEW.drop(columns=["ID","y"])

In [None]:
yData=dfTrainNEW['y']

In [None]:
yData

In [None]:
encoder = LabelEncoder()
encoder.fit(yData)


In [None]:
encoder = LabelEncoder()
encoder.fit(yData)
encoded_Y = encoder.transform(yData)
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)

In [None]:
encoded_Y

In [None]:
y

In [None]:
# # define baseline model
# def baseline_model():
# 	# create model
# 	model = Sequential()
#   model.add(Dense(256, input_shape=[x.shape[1]], activation='relu'))
#   model.add(Dropout(0.2))
#   model.add(Dense(512, activation='relu'))
#   model.add(Dense(y.shape[1], activation='softmax'))
# 	# Compile model
# 	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# 	return model

In [None]:
# estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=32, verbose=0)
# kfold = KFold(n_splits=10, shuffle=True)
# results = cross_val_score(estimator, X, dummy_y, cv=kfold)
# print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2, random_state=42,stratify =yData)

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_val= sc.transform(x_val)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
x.shape

In [None]:
x.shape[1]

In [None]:
y.shape

In [None]:
y.shape[1]

In [None]:
x_train.shape

In [None]:
x_train

## Defining the model structure

In [None]:
model = Sequential()
model.add(Dense(256, input_shape=[x.shape[1]], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
lrd = ReduceLROnPlateau(monitor = 'val_loss',
                         patience = 10,
                         verbose = 1,
                         factor = 0.75,
                         min_lr = 1e-10)

mcp = ModelCheckpoint('model.h5')

es = EarlyStopping(verbose=1, patience=100)

## Training the model

In [None]:
history = model.fit(x=x_train, y=y_train, epochs=500, callbacks=[lrd, mcp, es], batch_size=32, validation_split=0.1)

## Testing & evaluating the model

In [None]:
y_pred = np.argmax(model.predict(x_val), axis=-1)
y_pred 

In [None]:
y_val

In [None]:
np.argmax(model.predict(x_val), axis=-1)[10]

In [None]:
yData.iloc[10]

In [None]:
model.evaluate(x_val, y_val)

In [None]:
y_pred

## Visualize Loss

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

## True Test

In [None]:
dftest=pd.read_csv("testTrue.csv")

In [None]:
dftest['y'].unique()

In [None]:
for f in featuersToconvert:
  Convert(dftest,f)

In [None]:
df_YTrue=dftest["y"]
df_YTrue

In [None]:
encoder = LabelEncoder()
encoder.fit(df_YTrue)
encoded_Y = encoder.transform(df_YTrue)
# convert integers to dummy variables (i.e. one hot encoded)
ytest = np_utils.to_categorical(encoded_Y)

In [None]:
ytest

In [None]:
# dftest = pd.get_dummies(dftest, columns=['y'])
# dftest

In [None]:
xtest = dftest.drop(columns="y")

In [None]:
xtest.shape

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(xtest)
xtest = sc.transform(xtest)

In [None]:
y_pred2 = np.argmax(model.predict(xtest), axis=-1)
y_pred2

In [None]:
dfyPred = pd.DataFrame(y_pred2, columns = ["yPred"])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==0), 'BARBUNYA',dfyPred['yPred'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==1), 'BOMBAY',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==2), 'CALI',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==3), 'DERMASON',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==4), 'HOROZ',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==5), 'SEKER',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==6), 'SIRA',dfyPred['yPredNew'])
dfyPred['result'] = dfyPred['yPredNew'].map(lambda x: x.lstrip('y_'))
dfyPred=dfyPred["result"]
dfyPred

In [None]:
model.evaluate(xtest, ytest)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, plot_confusion_matrix

In [None]:
accuracy_score(df_YTrue,dfyPred)

In [None]:
print(confusion_matrix(df_YTrue,dfyPred))

In [None]:
print(classification_report(df_YTrue,dfyPred))

In [None]:
print("Number of mislabeled points out of a total %d points : %d"% (xtest.shape[0], (df_YTrue != dfyPred).sum()))

_________________________________________________________

## Our Test

In [None]:
dfTestNEW.sample(5)

In [None]:
ID=dfTestNEW["ID"]

In [None]:
xtest=dfTestNEW.drop(columns=['ID'])
xtest

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(xtest)
xtest = sc.transform(xtest)

In [None]:
y_pred2 = np.argmax(model.predict(xtest), axis=-1)
y_pred2

In [None]:
dfyPred = pd.DataFrame(y_pred2, columns = ["yPred"])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==0), 'BARBUNYA',dfyPred['yPred'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==1), 'BOMBAY'  ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==2), 'CALI'    ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==3), 'DERMASON',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==4), 'HOROZ'   ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==5), 'SEKER'   ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==6), 'SIRA',   dfyPred['yPredNew'])

dfyPred=dfyPred["yPredNew"]
dfyPred

In [None]:
dfTestNEW['y'] = dfyPred

In [None]:
dfTestNEW

In [None]:
dfTestNEW.drop(columns="ID",inplace=True)
dfTestNEW=pd.concat([dfTestNEW, ID],axis=1)

#dfTestNEW[['ID', 'y']].to_csv('/kaggle/working/submission.csv', index=False)
dfTestNEW[['ID', 'y']].to_csv('submission.csv', index=False)

__________________________