In [1]:
import pandas as pd
import numpy as np
import sys
import sklearn
import io
import random
import time

In [2]:
train_url = 'https://raw.githubusercontent.com/merteroglu/NSL-KDD-Network-Instrusion-Detection/master/NSL_KDD_Train.csv'
test_url = 'https://raw.githubusercontent.com/merteroglu/NSL-KDD-Network-Instrusion-Detection/master/NSL_KDD_Test.csv'

In [3]:
col_names = ["duration","protocol_type","service","flag","src_bytes",
    "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins",
    "logged_in","num_compromised","root_shell","su_attempted","num_root",
    "num_file_creations","num_shells","num_access_files","num_outbound_cmds",
    "is_host_login","is_guest_login","count","srv_count","serror_rate",
    "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate",
    "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count",
    "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate",
    "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate",
    "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"]


df = pd.read_csv(train_url,header=None, names = col_names)

df_test = pd.read_csv(test_url, header=None, names = col_names)

print('Dimensions of the Training set:',df.shape)
print('Dimensions of the Test set:',df_test.shape)

Dimensions of the Training set: (125973, 42)
Dimensions of the Test set: (22544, 42)


In [4]:
df.head(5)

Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,tcp,ftp_data,SF,491,0,0,0,0,0,...,25,0.17,0.03,0.17,0.0,0.0,0.0,0.05,0.0,normal
1,0,udp,other,SF,146,0,0,0,0,0,...,1,0.0,0.6,0.88,0.0,0.0,0.0,0.0,0.0,normal
2,0,tcp,private,S0,0,0,0,0,0,0,...,26,0.1,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune
3,0,tcp,http,SF,232,8153,0,0,0,0,...,255,1.0,0.0,0.03,0.04,0.03,0.01,0.0,0.01,normal
4,0,tcp,http,SF,199,420,0,0,0,0,...,255,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal


In [5]:
print('Label distribution Training set:')
print(df['label'].value_counts())
print()
print('Label distribution Test set:')
print(df_test['label'].value_counts())

Label distribution Training set:
label
normal             67343
neptune            41214
satan               3633
ipsweep             3599
portsweep           2931
smurf               2646
nmap                1493
back                 956
teardrop             892
warezclient          890
pod                  201
guess_passwd          53
buffer_overflow       30
warezmaster           20
land                  18
imap                  11
rootkit               10
loadmodule             9
ftp_write              8
multihop               7
phf                    4
perl                   3
spy                    2
Name: count, dtype: int64

Label distribution Test set:
label
normal             9711
neptune            4657
guess_passwd       1231
mscan               996
warezmaster         944
apache2             737
satan               735
processtable        685
smurf               665
back                359
snmpguess           331
saint               319
mailbomb            293
snmpgetattac

In [6]:
df['protocol_type']= df['protocol_type'].astype('category').cat.codes
df['flag'] = df['flag'].astype('category').cat.codes
df['service'] = df['service'].astype('category').cat.codes
df['label'] = df['label'].astype('category').cat.codes

In [7]:
 print('Label distribution Training set:')
 print(df['label'].value_counts())
 print()
 print('Label distribution Test set:')
 print(df_test['label'].value_counts())

Label distribution Training set:
label
11    67343
9     41214
17     3633
5      3599
15     2931
18     2646
10     1493
0       956
20      892
21      890
14      201
3        53
1        30
22       20
6        18
4        11
16       10
7         9
2         8
8         7
13        4
12        3
19        2
Name: count, dtype: int64

Label distribution Test set:
label
normal             9711
neptune            4657
guess_passwd       1231
mscan               996
warezmaster         944
apache2             737
satan               735
processtable        685
smurf               665
back                359
snmpguess           331
saint               319
mailbomb            293
snmpgetattack       178
portsweep           157
ipsweep             141
httptunnel          133
nmap                 73
pod                  41
buffer_overflow      20
multihop             18
named                17
ps                   15
sendmail             14
rootkit              13
xterm                13

Data Preprocessing:

In [8]:
print('Training set:')
for col_name in df.columns:
    if df[col_name].dtypes == 'object' :
        unique_cat = len(df[col_name].unique())
        print("Feature '{col_name}' has {unique_cat} categories".format(col_name=col_name, unique_cat=unique_cat))

print()
print('Distribution of categories in service:')
print(df['service'].value_counts().sort_values(ascending=False).head())

Training set:

Distribution of categories in service:
service
24    40338
49    21853
12     9043
54     7313
20     6860
Name: count, dtype: int64


In [9]:
print('Test set:')
for col_name in df_test.columns:
    if df_test[col_name].dtypes == 'object' :
        unique_cat = len(df_test[col_name].unique())
        print("Feature '{col_name}' has {unique_cat} categories".format(col_name=col_name, unique_cat=unique_cat))

Test set:
Feature 'protocol_type' has 3 categories
Feature 'service' has 64 categories
Feature 'flag' has 11 categories
Feature 'label' has 38 categories


LabelEncoder

Insert categorical features into a 2D numpy array

In [10]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
categorical_columns=['protocol_type', 'service', 'flag']

df_categorical_values = df[categorical_columns]
testdf_categorical_values = df_test[categorical_columns]

df_categorical_values.head()

Unnamed: 0,protocol_type,service,flag
0,1,20,9
1,2,44,9
2,1,49,5
3,1,24,9
4,1,24,9


This below cell had an error which was concatenation error i.e it was trying to 
concatenate string and int.

In [12]:
# protocol_type
unique_protocol = sorted(df.protocol_type.unique())
string1 = 'Protocol_type_'
unique_protocol2 = [string1 + str(x) for x in unique_protocol] # Chnaged x to  Str(x) here
print(unique_protocol2)

# service
unique_service = sorted(df.service.unique())
string2 = 'service_'
unique_service2 = [string2 + str(x) for x in unique_service]  # Chnaged x to  Str(x) here
print(unique_service2)

# flag
unique_flag = sorted(df.flag.unique())
string3 = 'flag_'
unique_flag2 = [string3 + str(x) for x in unique_flag]  # Chnaged x to  Str(x) here
print(unique_flag2)

# put together
dumcols = unique_protocol2 + unique_service2 + unique_flag2

# for test set
unique_service_test = sorted(df_test.service.unique())
unique_service2_test = [string2 + str(x) for x in unique_service_test]  # Chnaged x to  Str(x) here
testdumcols = unique_protocol2 + unique_service2_test + unique_flag2


['Protocol_type_0', 'Protocol_type_1', 'Protocol_type_2']
['service_0', 'service_1', 'service_2', 'service_3', 'service_4', 'service_5', 'service_6', 'service_7', 'service_8', 'service_9', 'service_10', 'service_11', 'service_12', 'service_13', 'service_14', 'service_15', 'service_16', 'service_17', 'service_18', 'service_19', 'service_20', 'service_21', 'service_22', 'service_23', 'service_24', 'service_25', 'service_26', 'service_27', 'service_28', 'service_29', 'service_30', 'service_31', 'service_32', 'service_33', 'service_34', 'service_35', 'service_36', 'service_37', 'service_38', 'service_39', 'service_40', 'service_41', 'service_42', 'service_43', 'service_44', 'service_45', 'service_46', 'service_47', 'service_48', 'service_49', 'service_50', 'service_51', 'service_52', 'service_53', 'service_54', 'service_55', 'service_56', 'service_57', 'service_58', 'service_59', 'service_60', 'service_61', 'service_62', 'service_63', 'service_64', 'service_65', 'service_66', 'service_67',

Transform categorical features into numbers using LabelEncoder()

In [13]:
df_categorical_values_enc=df_categorical_values.apply(LabelEncoder().fit_transform)

print(df_categorical_values.head())
print('--------------------')
print(df_categorical_values_enc.head())

# test set
testdf_categorical_values_enc=testdf_categorical_values.apply(LabelEncoder().fit_transform)

   protocol_type  service  flag
0              1       20     9
1              2       44     9
2              1       49     5
3              1       24     9
4              1       24     9
--------------------
   protocol_type  service  flag
0              1       20     9
1              2       44     9
2              1       49     5
3              1       24     9
4              1       24     9


One-Hot-Encoding

In [14]:
enc = OneHotEncoder(categories='auto')
df_categorical_values_encenc = enc.fit_transform(df_categorical_values_enc)
df_cat_data = pd.DataFrame(df_categorical_values_encenc.toarray(),columns=dumcols)


# test set
testdf_categorical_values_encenc = enc.fit_transform(testdf_categorical_values_enc)
testdf_cat_data = pd.DataFrame(testdf_categorical_values_encenc.toarray(),columns=testdumcols)

df_cat_data.head()

Unnamed: 0,Protocol_type_0,Protocol_type_1,Protocol_type_2,service_0,service_1,service_2,service_3,service_4,service_5,service_6,...,flag_1,flag_2,flag_3,flag_4,flag_5,flag_6,flag_7,flag_8,flag_9,flag_10
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [16]:
trainservice=df['service'].tolist()
testservice= df_test['service'].tolist()
difference=list(set(trainservice) - set(testservice))
string = 'service_'
difference=[string + str(x) for x in difference]
difference

['service_0',
 'service_1',
 'service_2',
 'service_3',
 'service_4',
 'service_5',
 'service_6',
 'service_7',
 'service_8',
 'service_9',
 'service_10',
 'service_11',
 'service_12',
 'service_13',
 'service_14',
 'service_15',
 'service_16',
 'service_17',
 'service_18',
 'service_19',
 'service_20',
 'service_21',
 'service_22',
 'service_23',
 'service_24',
 'service_25',
 'service_26',
 'service_27',
 'service_28',
 'service_29',
 'service_30',
 'service_31',
 'service_32',
 'service_33',
 'service_34',
 'service_35',
 'service_36',
 'service_37',
 'service_38',
 'service_39',
 'service_40',
 'service_41',
 'service_42',
 'service_43',
 'service_44',
 'service_45',
 'service_46',
 'service_47',
 'service_48',
 'service_49',
 'service_50',
 'service_51',
 'service_52',
 'service_53',
 'service_54',
 'service_55',
 'service_56',
 'service_57',
 'service_58',
 'service_59',
 'service_60',
 'service_61',
 'service_62',
 'service_63',
 'service_64',
 'service_65',
 'service_66',
 'ser

In [17]:
for col in difference:
    testdf_cat_data[col] = 0

print(df_cat_data.shape)    
print(testdf_cat_data.shape)

(125973, 84)
(22544, 148)


New numerical columns are added to the main DataFrame

In [18]:
newdf=df.join(df_cat_data)
newdf.drop('flag', axis=1, inplace=True)
newdf.drop('protocol_type', axis=1, inplace=True)
newdf.drop('service', axis=1, inplace=True)

# test data
newdf_test=df_test.join(testdf_cat_data)
newdf_test.drop('flag', axis=1, inplace=True)
newdf_test.drop('protocol_type', axis=1, inplace=True)
newdf_test.drop('service', axis=1, inplace=True)

print(newdf.shape)
print(newdf_test.shape)

(125973, 123)
(22544, 187)


In [19]:
labeldf=newdf['label']
labeldf_test=newdf_test['label']


# change the label column
newlabeldf=labeldf.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
                           'ipsweep' : 1,'nmap' : 1,'portsweep' : 1,'satan' : 1,'mscan' : 1,'saint' : 1,
                            'ftp_write': 1,'guess_passwd': 1,'imap': 1,'multihop': 1,'phf': 1,'spy': 1,'warezclient': 1,'warezmaster': 1,'sendmail': 1,'named': 1,'snmpgetattack': 1,'snmpguess': 1,'xlock': 1,'xsnoop': 1,'httptunnel': 1,
                           'buffer_overflow': 1,'loadmodule': 1,'perl': 1,'rootkit': 1,'ps': 1,'sqlattack': 1,'xterm': 1 })
newlabeldf_test=labeldf_test.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
                           'ipsweep' : 1,'nmap' : 1,'portsweep' : 1,'satan' : 1,'mscan' : 1,'saint' : 1
                           ,'ftp_write': 1,'guess_passwd': 1,'imap': 1,'multihop': 1,'phf': 1,'spy': 1,'warezclient': 1,'warezmaster': 1,'sendmail': 1,'named': 1,'snmpgetattack': 1,'snmpguess': 1,'xlock': 1,'xsnoop': 1,'httptunnel': 1,
                           'buffer_overflow': 1,'loadmodule': 1,'perl': 1,'rootkit': 1,'ps': 1,'sqlattack': 1,'xterm': 1})


# put the new label column back
newdf['label'] = newlabeldf
newdf_test['label'] = newlabeldf_test

  newlabeldf_test=labeldf_test.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,


In [21]:
x = newdf.drop(['duration','land','wrong_fragment','urgent','num_failed_logins','logged_in','num_compromised','num_file_creations','num_root','root_shell','su_attempted','num_shells','num_access_files','num_outbound_cmds','is_host_login','is_guest_login','flag_S2','flag_S3','flag_SH','srv_rerror_rate','service_csnet_ns','service_ctf','service_daytime','service_discard','service_domain','service_domain_u','service_echo','service_eco_i','service_ecr_i','service_efs','service_exec','service_finger','service_ftp','service_ftp_data','service_gopher','service_netbios_ns','service_ldap','service_kshell','service_klogin','service_iso_tsap','service_imap4','service_http_443','service_hostnames','service_netbios_dgm','service_name','service_mtp','service_login','service_link','service_pop_3','service_pop_2','service_pm_dump','service_other','service_ntp_u','service_nntp','service_nnsp','service_netstat','service_netbios_ssn','service_ssh','service_sql_net','service_sunrpc','service_smtp','service_shell','service_rje','service_remote_job','service_private','service_printer','service_uucp_path','service_uucp','service_urp_i','service_time','service_tim_i','service_tftp_u','service_telnet','service_systat','service_supdup','dst_host_count','srv_diff_host_rate','diff_srv_rate','flag_S0','flag_S1','rerror_rate','flag_RSTR','flag_RSTOS0','flag_RSTO','flag_REJ','flag_OTH','service_whois','service_vmnet','srv_serror_rate','serror_rate','service_urh_i','service_red_i','service_harvest','service_http_2784','dst_host_srv_rerror_rate','dst_host_rerror_rate','dst_host_srv_serror_rate','dst_host_serror_rate','dst_host_srv_diff_host_rate','Protocol_type_tcp','Protocol_type_udp','service_IRC','service_X11','service_Z39_50','service_auth','service_bgp','service_courier','service_http_8001','service_aol'], axis=1,errors='ignore')


In [23]:
x_test =newdf_test.drop(['duration','land','wrong_fragment','urgent','num_failed_logins','logged_in','num_compromised','num_file_creations','num_root','root_shell','su_attempted','num_shells','num_access_files','num_outbound_cmds','is_host_login','is_guest_login','flag_S2','flag_S3','flag_SH','srv_rerror_rate','service_csnet_ns','service_ctf','service_daytime','service_discard','service_domain','service_domain_u','service_echo','service_eco_i','service_ecr_i','service_efs','service_exec','service_finger','service_ftp','service_ftp_data','service_gopher','service_netbios_ns','service_ldap','service_kshell','service_klogin','service_iso_tsap','service_imap4','service_http_443','service_hostnames','service_netbios_dgm','service_name','service_mtp','service_login','service_link','service_pop_3','service_pop_2','service_pm_dump','service_other','service_ntp_u','service_nntp','service_nnsp','service_netstat','service_netbios_ssn','service_ssh','service_sql_net','service_sunrpc','service_smtp','service_shell','service_rje','service_remote_job','service_private','service_printer','service_uucp_path','service_uucp','service_urp_i','service_time','service_tim_i','service_tftp_u','service_telnet','service_systat','service_supdup','dst_host_count','srv_diff_host_rate','diff_srv_rate','flag_S0','flag_S1','rerror_rate','flag_RSTR','flag_RSTOS0','flag_RSTO','flag_REJ','flag_OTH','service_whois','service_vmnet','srv_serror_rate','serror_rate','service_urh_i','service_red_i','service_harvest','service_http_2784','dst_host_srv_rerror_rate','dst_host_rerror_rate','dst_host_srv_serror_rate','dst_host_serror_rate','dst_host_srv_diff_host_rate','Protocol_type_tcp','Protocol_type_udp','service_IRC','service_X11','service_Z39_50','service_auth','service_bgp','service_courier','service_http_8001','service_aol'], axis=1,errors='ignore')


Step 2: Feature Scaling

In [25]:
# Split dataframes into X & Y
X_Df = x.drop('label',axis=1)
Y_Df = newdf.label

# test set
X_Df_test = x_test.drop('label',axis=1)
Y_Df_test = newdf_test.label
# changed the argument of .drop() from 1 to axis =1

In [26]:
X_Df.shape

(125973, 94)

Since the column names will be deleted at this stage, we save them to use later.

In [27]:
colNames=list(X_Df)
colNames_test=list(X_Df_test)

In [28]:
from sklearn import preprocessing
scaler1 = preprocessing.StandardScaler().fit(X_Df)
X_Df=scaler1.transform(X_Df) 

# test data
scaler5 = preprocessing.StandardScaler().fit(X_Df_test)
X_Df_test=scaler5.transform(X_Df_test) 

In [33]:
!pip install keras

Collecting keras
  Downloading keras-3.10.0-py3-none-any.whl.metadata (6.0 kB)
Collecting absl-py (from keras)
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting namex (from keras)
  Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)
Collecting optree (from keras)
  Downloading optree-0.16.0-cp312-cp312-win_amd64.whl.metadata (31 kB)
Collecting ml-dtypes (from keras)
  Downloading ml_dtypes-0.5.1-cp312-cp312-win_amd64.whl.metadata (22 kB)
Downloading keras-3.10.0-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 10.3 MB/s eta 0:00:00
Downloading absl_py-2.3.0-py3-none-any.whl (135 kB)
Downloading ml_dtypes-0.5.1-cp312-cp312-win_amd64.whl (210 kB)
Downloading namex-0.1.0-py3-none-any.whl (5.9 kB)
Downloading optree-0.16.0-cp312-cp312-win_amd64.whl (315 kB)
Installing collected packages: namex, optree, ml-dtypes, absl-py, keras
Successfully installed absl-

In [35]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow)
  Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow)
  Downloading grpcio

In [37]:
from tensorflow.keras.utils import to_categorical
y_binary = to_categorical(Y_Df)
y_test_binary = to_categorical(Y_Df_test)

Random Forest - Feature Selection

In [38]:
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier


clf = RandomForestClassifier(n_estimators=10,n_jobs=2)
rfe = RFE(estimator=clf, n_features_to_select=13, step=1)

In [39]:
rfe.fit(X_Df, Y_Df.astype(int))
X_rfeDoS=rfe.transform(X_Df)
true=rfe.support_
rfecolindex_DoS=[i for i, x in enumerate(true) if x]
rfecolname_DoS=list(colNames[i] for i in rfecolindex_DoS)

Summary of features selected by RFE

In [40]:
print('Features selected for DoS:',rfecolname_DoS)
print()

Features selected for DoS: ['src_bytes', 'dst_bytes', 'count', 'srv_count', 'same_srv_rate', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'Protocol_type_0', 'service_15', 'flag_5', 'flag_9']



In [41]:
print(X_Df.shape)

(125973, 94)


ANN

In [45]:
import time
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from keras.datasets import mnist

from tensorflow.keras.utils import model_to_dot  # <-- changed here
from IPython.display import SVG

from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.models import load_model
from keras.layers import Dense, Activation # <-- changed here
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Fixed import for layers:
# Removed the deprecated 'keras.layers.core' path and imported Dense and Activation directly from 'keras.layers'
# as the 'core' module no longer exists in the updated keras API.

In [50]:
# Create neural net
model = Sequential()
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(10, kernel_initializer='normal'))
model.add(Dense(23, activation='softmax')) #  changed from 2 to 21 in the first argument here.


In [51]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [52]:
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=3, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="modelann.h5", verbose=0, save_best_only=True)

In [53]:
model.fit(X_Df, y_binary, batch_size=128, epochs=50, verbose=1, validation_split=0.1)

Epoch 1/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.8252 - loss: 0.0799 - val_accuracy: 0.9758 - val_loss: 0.0064
Epoch 2/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 14ms/step - accuracy: 0.9755 - loss: 0.0061 - val_accuracy: 0.9731 - val_loss: 0.0058
Epoch 3/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - accuracy: 0.9773 - loss: 0.0053 - val_accuracy: 0.9768 - val_loss: 0.0058
Epoch 4/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.9783 - loss: 0.0050 - val_accuracy: 0.9786 - val_loss: 0.0048
Epoch 5/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - accuracy: 0.9790 - loss: 0.0047 - val_accuracy: 0.9788 - val_loss: 0.0048
Epoch 6/50
[1m886/886[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.9806 - loss: 0.0044 - val_accuracy: 0.9798 - val_loss: 0.0048
Epoch 7/50
[1m8

<keras.src.callbacks.history.History at 0x23d91f9c1a0>