In [None]:
####################################################################################################################
# Competition   : Dry Beans Classification
# Team Members  : 1- Abdullah Abdelhakeem
#                 2- Mohamed Sebaie                   
#                 3- Mohamed Moustafa
#                 4- Ossama Ahmed
#                 5- Mahmoud Osama
#                 6- Hazem
#
# Problem :       Supervised Classification
# version :       0.0.1
#
###################################################################################################################

# Dataset Information



Given a set of features extracted from the shape of the beans in images and  it's required to predict the class of a bean given some features about its shape.
There are 7 bean types in this dataset.

**Data fields**
- ID - an ID for this instance
- Area - (A), The area of a bean zone and the number of pixels within its boundaries.
- Perimeter - (P), Bean circumference is defined as the length of its border.
- MajorAxisLength - (L), The distance between the ends of the longest line that can be drawn from a bean.
- MinorAxisLength - (l), The longest line that can be drawn from the bean while standing perpendicular to the main axis.
- AspectRatio - (K), Defines the relationship between L and l.
- Eccentricity - (Ec), Eccentricity of the ellipse having the same moments as the region.
- ConvexArea - (C), Number of pixels in the smallest convex polygon that can contain the area of a bean seed.
- EquivDiameter - (Ed), The diameter of a circle having the same area as a bean seed area.
- Extent - (Ex), The ratio of the pixels in the bounding box to the bean area.
- Solidity - (S), Also known as convexity. The ratio of the pixels in the convex shell to those found in beans.
- Roundness - (R), Calculated with the following formula: (4piA)/(P^2)
- Compactness - (CO), Measures the roundness of an object: Ed/L
- ShapeFactor1 - (SF1)
- ShapeFactor2 - (SF2)
- ShapeFactor3 - (SF3)
- ShapeFactor4 - (SF4)
- y - the class of the bean. It can be any of BARBUNYA, SIRA, HOROZ, DERMASON, CALI, BOMBAY, and SEKER.


<img src= "https://www.thespruceeats.com/thmb/eeIti36pfkoNBaipXrTHLjIv5YA=/1888x1416/smart/filters:no_upscale()/DriedBeans-56f6c2c43df78c78418c3b46.jpg" alt ="Titanic" style='width: 800px;height:400px'>

# Import Required Libraries

In [None]:
# !pip install catboost

In [None]:
# pip install mlxtend

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
#####################################################################
# for visualizations
from pandas import plotting
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.style.use('fivethirtyeight')
sns.set(rc={'figure.figsize': [7, 7]}, font_scale=1.2)
###########################################################
%config Completer.use_jedi=False
pd.set_option("display.max_columns", None)

# ignore the warnings
import warnings
warnings.filterwarnings("ignore")
np.set_printoptions(suppress=True)

# for path
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Read Train and Test Data

In [None]:
# dataset_path = '/kaggle/input/dry-beans-classification-iti-ai-pro-intake01/'
# dfTrainO = pd.read_csv(os.path.join(dataset_path, 'train.csv'))
# dfTestO = pd.read_csv(os.path.join(dataset_path, 'test.csv'))
# print("The shape of the dataset is {}.\n\n".format(dfTrainO.shape))
# print("The shape of the dataset is {}.\n\n".format(dfTestO.shape))
# dfTrainO.head()

In [None]:
dfTrainO=pd.read_csv("train.csv")
print('---'*30)
print('Train Head')
print('---'*30)
display(dfTrainO.head())
dfTestO=pd.read_csv("test.csv")
print('---'*30)
print('Test Head')
print('---'*30)
display(dfTestO.head())

__________________________________________________

In [None]:
print(dfTrainO.shape)

print(dfTestO.shape)

In [None]:
print(dfTrainO.info())
print("***"*20)
print(dfTestO.info())

In [None]:
dfTrainO.describe().T

**Features like:** (Eccentricity , Extent ,Solidity ,roundness ,Compactness ,and shapeFactor3,4 ) **ranges between (0 and 1)**

**Features Like:** (shapeFactor1,2) **Ranges in 0.001**

**On the other side , there are other features like:**
- (Area) ranges between (20420 and 254616 )
- (ConvexArea) ranges between (20684 and 263261 )
- (Perimeter) ranges between (524 and 1986)
- (MajorAxisLength) ranges between (183 and 738)
- (MinorAxisLength) ranges between (129 and 451)
- (EquivDiameter) ranges between (161 and 570)
- (AspectRation) ranges between (1 and 2.4)


When a dataset has values of different columns at different scales, it gets tough to analyze the trends and patterns , so we need to make sure that all the columns have a significant difference in their scales, and they can be modified in such a way that all those values fall into the same scale. This process is called Scaling.

In [None]:
dfTestO.describe().T

In [None]:
dfTrainO.describe(include=object)

In [None]:
dfTrainO['y'].value_counts()

# Data Visualization
**Heatmap**

In [None]:
corr_matrix = dfTrainO.corr()

plt.figure(figsize=(15,15))
plt.title('Correlation Heatmap of Beans Dataset')
a = sns.heatmap(corr_matrix, square=True, annot=True, fmt='.2f', linecolor='black')
a.set_xticklabels(a.get_xticklabels(), rotation=30)
a.set_yticklabels(a.get_yticklabels(), rotation=30)
plt.show()

In [None]:
abs(dfTrainO.drop(columns="ID").corr())

From this correlation matrix we can exctract features that are strongly correlated like : 
- Area
- Perimeter
- MajorAxisLength
- MinorAxisLength
- ConvexArea
- EquivDiameter
- ShapeFactor1

Features to be drobbed : 

- ShapeFactor3
- Compactness
- AspectRation
- Area
- MajorAxisLength
- MinorAxisLength
- ConvexArea
- EquivDiameter
- ShapeFactor1

In [None]:
Strongly_corr_features = dfTrainO.drop(columns="ID")
Strongly_corr_features.head()
sns.set_theme(style="whitegrid")
sns.pairplot(Strongly_corr_features, hue="y")

**From the graph above, Linear and log relations can be detected.**

**Next step will be Detecting how Beans classes can be effected by many features ..**

In [None]:
sns.boxplot(x="y", y="MajorAxisLength", data=dfTrainO)

In [None]:
sns.boxplot(x="y", y="Perimeter", data=dfTrainO)

- A perimeter is  a path that encompasses/surrounds/outlines a shape or its length. 'Wikipedia'
- The above graph shows that (BOMBAY) has the highest perimeter

In [None]:
dfTrainNEW=dfTrainO.copy()

In [None]:
dfTrainNEW.columns

In [None]:
dfTestNEW=dfTestO.copy()

# Value Counts and Uniques Equations

In [None]:
def ValueCounts(df):
    for c in df.columns:
        print(c+"\n"+"-----------------"+"\n")
        print(df[c].value_counts().to_frame())
        print("\n"+"******************"+"\n")
        
def UniqueValues(df,l):
    for c in l:
        print(c+"\n"+"-----------------"+"\n")
        print(df[c].nunique())
        print("\n"+"******************"+"\n")

In [None]:
ValueCounts(dfTrainNEW)

In [None]:
ValueCounts(dfTestNEW)

In [None]:
UniqueValues(dfTrainNEW,dfTrainNEW.columns)

In [None]:
UniqueValues(dfTestNEW,dfTestNEW.columns)

In [None]:
dfTrainNEW.describe().T

In [None]:
dfTestNEW.describe().T

_________________________________________________

_______________________________________________________

# Check for skewness

In [None]:
figure, axes = plt.subplots(nrows=6, ncols=3)
figure.set_size_inches(20,15)
figure.tight_layout()
sns.kdeplot(dfTrainNEW['Area']                   ,shade=True, ax=axes[0][0]);
sns.kdeplot(dfTrainNEW['Perimeter']              ,shade=True, ax=axes[0][1]);
sns.kdeplot(dfTrainNEW['MajorAxisLength']        ,shade=True, ax=axes[0][2]);
sns.kdeplot(dfTrainNEW['MinorAxisLength']        ,shade=True, ax=axes[1][0]);
sns.kdeplot(dfTrainNEW['AspectRation']           ,shade=True, ax=axes[1][1]);
sns.kdeplot(dfTrainNEW['Eccentricity']           ,shade=True, ax=axes[1][2]);
sns.kdeplot(dfTrainNEW['ConvexArea']             ,shade=True, ax=axes[2][0]);
sns.kdeplot(dfTrainNEW['EquivDiameter']          ,shade=True, ax=axes[2][1]);
sns.kdeplot(dfTrainNEW['Extent']                 ,shade=True, ax=axes[2][2]);
sns.kdeplot(dfTrainNEW['Solidity']               ,shade=True, ax=axes[3][0]);
sns.kdeplot(dfTrainNEW['roundness']              ,shade=True, ax=axes[3][1]);
sns.kdeplot(dfTrainNEW['Compactness']            ,shade=True, ax=axes[3][2]);
# sns.kdeplot(dfTrainNEW['ShapeFactor1']           ,shade=True, ax=axes[4][0]);
# sns.kdeplot(dfTrainNEW['ShapeFactor2']           ,shade=True, ax=axes[4][1]);
sns.kdeplot(dfTrainNEW['ShapeFactor3']           ,shade=True, ax=axes[4][0]);
sns.kdeplot(dfTrainNEW['ShapeFactor4']           ,shade=True, ax=axes[4][1]);


## Features to Check

In [None]:
def SkewnessCheck(df,feature):
    df=df.copy()
    df['log'+feature]   =df[feature].apply(np.log1p)
    df['sqrt'+feature]  =df[feature].apply(np.sqrt)
    df['squar'+feature] =np.power(df[feature],2)
    figure, axes = plt.subplots(nrows=1, ncols=4)
    figure.set_size_inches(20,10)
    sns.kdeplot(df[feature]          ,shade=True, ax=axes[0]);
    sns.kdeplot(df['log'+feature]    ,shade=True, ax=axes[1]);
    sns.kdeplot(df['sqrt'+feature]   ,shade=True, ax=axes[2]);
    sns.kdeplot(df['squar'+feature]  ,shade=True, ax=axes[3]);

In [None]:
for f in dfTrainNEW.columns.to_list()[1:-5]:
      SkewnessCheck(dfTrainNEW,f)

In [None]:
dfTrainNEW.columns

### Best Parameters to Use
#### `Log` Area, Premeter, MajoirAxesLength, MinorAxesLength,ConvexArea, EquivDiameter

## Apply To Train and Test Data

In [None]:
def Convert(df,feature):
    df['log'+feature]   =df[feature].apply(np.log1p)
    

In [None]:
featuersToconvert=['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'ConvexArea', 'EquivDiameter']

In [None]:
for f in featuersToconvert:
      Convert(dfTrainNEW,f)

In [None]:
for f in featuersToconvert:
      Convert(dfTestNEW,f)

In [None]:
dfTrainNEW.shape

In [None]:
dfTestNEW.shape

In [None]:
dfTrainNEW.info()

In [None]:
dfTestNEW.columns

In [None]:
dfTestNEW.info()

________________________________________________

## Data Splitting

Now it's time to split the dataset for the training step. Typically the dataset is split into 3 subsets, namely, the training, validation and test sets. In our case, the test set is already predefined. So we'll split the "training" set into training and validation sets with 0.8:0.2 ratio. 


In [None]:
from sklearn.model_selection import cross_val_score,train_test_split

df_train, df_val = train_test_split(dfTrainNEW, test_size=0.2, random_state=42)

X_train = df_train.drop(columns=['ID', 'y' , 'ShapeFactor3','Compactness','AspectRation','Area','MajorAxisLength','MinorAxisLength','ConvexArea','EquivDiameter','ShapeFactor1' ])
y_train = df_train['y']

X_val = df_val.drop(columns=['ID', 'y', 'ShapeFactor3','Compactness','AspectRation','Area','MajorAxisLength','MinorAxisLength','ConvexArea','EquivDiameter','ShapeFactor1' ])
y_val = df_val['y']


print(f"X_train ={X_train.shape},\nX_val.shape = {X_val.shape},\ny_train.shape= {y_train.shape},\ny_val.shape={y_val.shape}")

# Machine Learning Model 

## Train Data

In [None]:
# pip install keras 

In [None]:
from keras.utils.np_utils import to_categorical

In [None]:
encoder = LabelEncoder()
encoder.fit(dfTrainNEW["y"])
encoded_Y = encoder.transform(dfTrainNEW["y"])
# convert integers to dummy variables (i.e. one hot encoded)
y = to_categorical(encoded_Y)

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(dfTrainNEW, test_size=0.33, random_state=0,stratify =dfTrainNEW['y'])
X_train = train_df.drop(columns=["ID","y"])
y_train = train_df['y']
################################################################################################
X_val = val_df.drop(columns=["ID","y"])
y_val = val_df['y']

In [None]:
encoder = LabelEncoder()
encoder.fit(y_train)
y_train = encoder.transform(y_train)
# convert integers to dummy variables (i.e. one hot encoded)
# y_train = np_utils.to_categorical(encoded_Y)

In [None]:
encoder = LabelEncoder()
encoder.fit(y_val)
y_val = encoder.transform(y_val)
# convert integers to dummy variables (i.e. one hot encoded)
# y_val = np_utils.to_categorical(encoded_Y)

In [None]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

# X_train=normalize(X_train)
# X_val=normalize(X_val)
# dfTest=normalize(dfTest)

# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# scaler.fit(X_train)
# X_train = scaler.transform(X_train)
# X_val = scaler.transform(X_val)

# from sklearn.preprocessing import minmax_scale
# X_train = minmax_scale(X_train, feature_range=(0, 1))
# X_val = minmax_scale(X_val, feature_range=(0, 1))

# from sklearn.preprocessing import RobustScaler
# rb = RobustScaler()
# X_train= rb.fit_transform(X_train)
# X_val = rb.fit_transform(X_val)

from sklearn.preprocessing import StandardScaler
scaler  = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

In [None]:
# pip install -U imbalanced-learn


In [None]:
from imblearn.over_sampling import SMOTE
smt = SMOTE(random_state=0)
X_train, y_train = smt.fit_resample(X_train, y_train)
np.unique(y_SMOTE , return_counts=True)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
models = {
    # "LogisticRegression":           LogisticRegression(),
    "KNeighborsClassifier":         KNeighborsClassifier(), 
    "DecisionTreeClassifier":       DecisionTreeClassifier(),
    "SupportVectorMachine":         SVC(C=1.0,kernel='rbf',gamma='auto'),
    "LinearDiscriminantAnalysis":   LinearDiscriminantAnalysis(),
    "GaussianNB":                   GaussianNB(),
    "SGDClassifier":                SGDClassifier(),
    "RandomForestClassifier":       RandomForestClassifier(),
    "BaggingClassifier":            BaggingClassifier(),
    "CatBoostClassifier":           CatBoostClassifier(verbose=False,loss_function='MultiClass'),
    "LGBMClassifier":               LGBMClassifier(),
    "GradientBoostingClassifier":   GradientBoostingClassifier(),
    "XGBClassifier":                XGBClassifier(eval_metric='mlogloss')
}

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
for name, model in models.items():
    print(f'Using model: {name}')
    print('-'*30)
    model.fit(X_train, y_train)
    y_trainhat = model.predict(X_train)
    y_valhat = model.predict(X_val)
    acc = accuracy_score(y_val, y_valhat)
    # print(f'Train_Report: {classification_report(y_train,y_trainhat)}')
    print(f'Train_Accuracy: {accuracy_score(y_train,y_trainhat)}')
    # print(f'Validation_Report: {classification_report(y_val,y_valhat)}')
    print(f'Validation_Accuracy: {accuracy_score(y_val,y_valhat)}')
    print('**'*30)

In [None]:
list(set(dfTrainNEW.columns) - set(dfTestNEW.columns))

In [None]:
dfTestNEW.head(2)

## Test Data

In [None]:
# ############################################
# TestYTrue= pd.read_csv("yTest.csv")
# yTrueTest=TestYTrue["y"]
# ############################################

ID=dfTestNEW["ID"]


In [None]:
dfTestNEW.shape

In [None]:
dfTestNEW.loc[: , dfTestNEW.columns!="ID"]

In [None]:
X_test = dfTestNEW.loc[: , dfTestNEW.columns!="ID"]


In [None]:
# X_test

In [None]:
X_test=scaler.transform(X_test)

In [None]:
Model=CatBoostClassifier(iterations=700, learning_rate=0.2,verbose=False)
Model.fit(X_train, y_train)

y_trainhat = Model.predict(X_train)
y_valhat = Model.predict(X_val)
acc = accuracy_score(y_val, y_valhat)
# print(f'Train_Report: {classification_report(y_train,y_trainhat)}')
print(f'Train_Accuracy: {accuracy_score(y_train,y_trainhat)}')
# print(f'Validation_Report: {classification_report(y_val,y_valhat)}')
print(f'Validation_Accuracy: {accuracy_score(y_val,y_valhat)}')
print('**'*30)

In [None]:
y_train

In [None]:
y_test_predicted = Model.predict(X_test)

dfTestO['y'] = y_test_predicted

dfTestO.drop(columns="ID",inplace=True)
dfTest=pd.concat([dfTestO, ID],axis=1)

#dfTestEncoded2[['ID', 'y']].to_csv('/kaggle/working/submission.csv', index=False)
dfTest[['ID', 'y']].to_csv('submission.csv', index=False)

In [None]:
dfTest[['ID', 'y']]

# Test ActualData vs TestData

In [None]:
#DifferenceActualTest.csv
testAcual = pd.read_csv("DifferenceActualTest.csv" , )

In [None]:
testAcual.columns

In [None]:
from imblearn.over_sampling import SMOTE
smt = SMOTE(random_state=0)
X_train, y_train = smt.fit_resample(X_train, y_train)
np.unique(y_SMOTE , return_counts=True)

In [None]:
testAcual["Class"]

### END Test

__________________________________________________

# DeepLearning

In [None]:
from keras.utils import np_utils

In [None]:
x=dfTrainNEW.drop(columns=["ID","y"])

In [None]:
yData=dfTrainNEW['y']

In [None]:
yData

In [None]:
encoder = LabelEncoder()
encoder.fit(yData)


In [None]:
encoder = LabelEncoder()
encoder.fit(yData)
encoded_Y = encoder.transform(yData)
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)

In [None]:
encoded_Y

In [None]:
y

In [None]:
# # define baseline model
# def baseline_model():
# 	# create model
# 	model = Sequential()
#   model.add(Dense(256, input_shape=[x.shape[1]], activation='relu'))
#   model.add(Dropout(0.2))
#   model.add(Dense(512, activation='relu'))
#   model.add(Dense(y.shape[1], activation='softmax'))
# 	# Compile model
# 	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# 	return model

In [None]:
# estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=32, verbose=0)
# kfold = KFold(n_splits=10, shuffle=True)
# results = cross_val_score(estimator, X, dummy_y, cv=kfold)
# print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2, random_state=42,stratify =yData)

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_val= sc.transform(x_val)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
x.shape

In [None]:
x.shape[1]

In [None]:
y.shape

In [None]:
y.shape[1]

In [None]:
x_train.shape

In [None]:
x_train

## Defining the model structure

In [None]:
model = Sequential()
model.add(Dense(256, input_shape=[x.shape[1]], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
lrd = ReduceLROnPlateau(monitor = 'val_loss',
                         patience = 10,
                         verbose = 1,
                         factor = 0.75,
                         min_lr = 1e-10)

mcp = ModelCheckpoint('model.h5')

es = EarlyStopping(verbose=1, patience=100)

## Training the model

In [None]:
history = model.fit(x=x_train, y=y_train, epochs=500, callbacks=[lrd, mcp, es], batch_size=32, validation_split=0.1)

## Testing & evaluating the model

In [None]:
y_pred = np.argmax(model.predict(x_val), axis=-1)
y_pred 

In [None]:
y_val

In [None]:
np.argmax(model.predict(x_val), axis=-1)[10]

In [None]:
yData.iloc[10]

In [None]:
model.evaluate(x_val, y_val)

In [None]:
y_pred

## Visualize Loss

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

## True Test

In [None]:
dftest=pd.read_csv("testTrue.csv")

In [None]:
dftest['y'].unique()

In [None]:
for f in featuersToconvert:
  Convert(dftest,f)

In [None]:
df_YTrue=dftest["y"]
df_YTrue

In [None]:
encoder = LabelEncoder()
encoder.fit(df_YTrue)
encoded_Y = encoder.transform(df_YTrue)
# convert integers to dummy variables (i.e. one hot encoded)
ytest = np_utils.to_categorical(encoded_Y)

In [None]:
ytest

In [None]:
# dftest = pd.get_dummies(dftest, columns=['y'])
# dftest

In [None]:
xtest = dftest.drop(columns="y")

In [None]:
xtest.shape

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(xtest)
xtest = sc.transform(xtest)

In [None]:
y_pred2 = np.argmax(model.predict(xtest), axis=-1)
y_pred2

In [None]:
dfyPred = pd.DataFrame(y_pred2, columns = ["yPred"])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==0), 'BARBUNYA',dfyPred['yPred'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==1), 'BOMBAY',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==2), 'CALI',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==3), 'DERMASON',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==4), 'HOROZ',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==5), 'SEKER',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==6), 'SIRA',dfyPred['yPredNew'])
dfyPred['result'] = dfyPred['yPredNew'].map(lambda x: x.lstrip('y_'))
dfyPred=dfyPred["result"]
dfyPred

In [None]:
model.evaluate(xtest, ytest)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, plot_confusion_matrix

In [None]:
accuracy_score(df_YTrue,dfyPred)

In [None]:
print(confusion_matrix(df_YTrue,dfyPred))

In [None]:
print(classification_report(df_YTrue,dfyPred))

In [None]:
print("Number of mislabeled points out of a total %d points : %d"% (xtest.shape[0], (df_YTrue != dfyPred).sum()))

_________________________________________________________

## Our Test

In [None]:
dfTestNEW.sample(5)

In [None]:
ID=dfTestNEW["ID"]

In [None]:
xtest=dfTestNEW.drop(columns=['ID'])
xtest

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(xtest)
xtest = sc.transform(xtest)

In [None]:
y_pred2 = np.argmax(model.predict(xtest), axis=-1)
y_pred2

In [None]:
dfyPred = pd.DataFrame(y_pred2, columns = ["yPred"])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==0), 'BARBUNYA',dfyPred['yPred'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==1), 'BOMBAY'  ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==2), 'CALI'    ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==3), 'DERMASON',dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==4), 'HOROZ'   ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==5), 'SEKER'   ,dfyPred['yPredNew'])
dfyPred['yPredNew']      = np.where((dfyPred['yPred']==6), 'SIRA',   dfyPred['yPredNew'])

dfyPred=dfyPred["yPredNew"]
dfyPred

In [None]:
dfTestNEW['y'] = dfyPred

In [None]:
dfTestNEW

In [None]:
dfTestNEW.drop(columns="ID",inplace=True)
dfTestNEW=pd.concat([dfTestNEW, ID],axis=1)

#dfTestNEW[['ID', 'y']].to_csv('/kaggle/working/submission.csv', index=False)
dfTestNEW[['ID', 'y']].to_csv('submission.csv', index=False)

__________________________