In [1]:
import pandas as pd
import numpy as np

In [2]:
ch1 = pd.read_csv("CH1.csv")

In [3]:
ch1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2617 entries, 0 to 2616
Columns: 5003 entries, ID to label
dtypes: float64(5001), int64(1), object(1)
memory usage: 99.9+ MB


In [4]:
X = ch1.select_dtypes(exclude = "int64")
Y = ch1["label"]

### Data Analysis

#### Labels Histogram

In [5]:
pd.value_counts(Y)

3    387
1    382
2    379
4    377
5    375
6    364
0    353
Name: label, dtype: int64

In [6]:
fft = X.select_dtypes("float64").T

##### Files with infinity components

In [7]:
ch1.loc[np.where(np.max(fft) == np.inf)[0], "ID"]

55      0_c2319c35c100.csv
1169    3_c1617c35c100.csv
1338    3_c1934c50c100.csv
1915    5_c0849c30c000.csv
1925    5_c0859c30c000.csv
1926    5_c0860c30c000.csv
1927    5_c0861c30c000.csv
2107    5_c1248c50c050.csv
2108    5_c1249c50c050.csv
2109    5_c1250c50c050.csv
2110    5_c1251c50c050.csv
2111    5_c1252c50c050.csv
2112    5_c1253c50c050.csv
2113    5_c1254c50c050.csv
Name: ID, dtype: object

In [8]:
finite_X = X.drop(np.where(np.max(fft) == np.inf)[0])
finite_Y = Y.drop(np.where(np.max(fft) == np.inf)[0])

In [9]:
finite_Y = finite_Y.drop(np.where(finite_X.isna().any(axis = 1))[0])
finite_X = finite_X.drop(np.where(finite_X.isna().any(axis = 1))[0])

In [10]:
finite_X = finite_X.reset_index(drop=True)
finite_Y = finite_Y.reset_index(drop=True)

In [11]:
finite_X

Unnamed: 0,ID,0,1,2,3,4,5,6,7,8,...,4991,4992,4993,4994,4995,4996,4997,4998,4999,5000
0,0_c2264c30c100.csv,1.624898,0.000098,0.000067,0.000717,0.000031,0.000052,0.000010,0.000006,0.000055,...,0.000016,0.000049,0.000058,0.000038,0.000025,0.000011,0.000054,0.000027,0.000030,6.080000e-06
1,0_c2265c30c100.csv,1.624795,0.000019,0.000071,0.000718,0.000005,0.000028,0.000038,0.000012,0.000019,...,0.000019,0.000050,0.000048,0.000034,0.000012,0.000012,0.000057,0.000039,0.000032,6.000000e-06
2,0_c2266c30c100.csv,1.624771,0.000038,0.000072,0.000653,0.000041,0.000057,0.000050,0.000036,0.000044,...,0.000022,0.000043,0.000041,0.000028,0.000025,0.000018,0.000018,0.000043,0.000015,3.900000e-05
3,0_c2267c30c100.csv,1.620564,0.000041,0.000044,0.000353,0.000069,0.000074,0.000028,0.000010,0.000035,...,0.000018,0.000053,0.000023,0.000011,0.000014,0.000033,0.000081,0.000029,0.000054,6.400000e-05
4,0_c2268c30c100.csv,1.620967,0.000063,0.000062,0.000350,0.000037,0.000048,0.000011,0.000039,0.000040,...,0.000052,0.000022,0.000004,0.000014,0.000043,0.000026,0.000050,0.000037,0.000036,1.990000e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2586,6_c2259c60c000.csv,1.603394,0.000095,0.000057,0.000046,0.000019,0.000068,0.000059,0.000025,0.000044,...,0.000074,0.000026,0.000032,0.000033,0.000054,0.000049,0.000026,0.000041,0.000028,3.340000e-05
2587,6_c2260c60c000.csv,1.603403,0.000022,0.000073,0.000099,0.000046,0.000096,0.000031,0.000061,0.000044,...,0.000049,0.000024,0.000018,0.000031,0.000043,0.000037,0.000085,0.000017,0.000077,2.040000e-05
2588,6_c2261c60c000.csv,1.603343,0.000076,0.000043,0.000039,0.000018,0.000096,0.000090,0.000048,0.000068,...,0.000027,0.000029,0.000047,0.000055,0.000032,0.000015,0.000077,0.000021,0.000020,6.250000e-07
2589,6_c2262c60c000.csv,1.603901,0.000032,0.000019,0.000109,0.000045,0.000044,0.000036,0.000085,0.000083,...,0.000047,0.000057,0.000036,0.000023,0.000060,0.000078,0.000059,0.000025,0.000039,4.340000e-05


### Data Prep

In [12]:
from sklearn.model_selection import train_test_split

clean_X = finite_X.drop(["0", "ID"], axis = 1)

x_train, x_test, y_train, y_test = train_test_split(clean_X, finite_Y, test_size=0.2, random_state=0, stratify=finite_Y)

In [13]:
x_train.shape

(2072, 5000)

In [14]:
np.max(x_train)

1       0.001310
2       0.001854
3       0.001569
4       0.001676
5       0.001486
          ...   
4996    0.000173
4997    0.000171
4998    0.000198
4999    0.000159
5000    0.000270
Length: 5000, dtype: float64

In [15]:
n = np.reshape(np.linalg.norm(x_train, axis = 1), (-1, 1))

In [16]:
normalized_x_train = x_train/n

In [17]:
np.max(normalized_x_train)

1       0.035059
2       0.033442
3       0.027407
4       0.031773
5       0.021715
          ...   
4996    0.003396
4997    0.005486
4998    0.007827
4999    0.004305
5000    0.006199
Length: 5000, dtype: float64

### Model Building starts

In [18]:
import tensorflow as tf

from keras.layers import Dense
from keras import Sequential
from keras.losses import SparseCategoricalCrossentropy

In [29]:
model = Sequential([Dense(2500, activation = "gelu"),
                    Dense(1200, activation = "gelu"),
                   Dense(600, activation = "gelu"),
                   Dense(100, activation = "gelu"),
                   Dense(10, activation = "softmax")])

model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer = "sgd",metrics=["accuracy"])

In [30]:
model.predict(x_test)



array([[0.09999438, 0.10000434, 0.09998769, ..., 0.10001612, 0.10000445,
        0.09999719],
       [0.10000116, 0.10000043, 0.09998383, ..., 0.10000911, 0.10004969,
        0.09997936],
       [0.10004791, 0.10008541, 0.09985479, ..., 0.10000784, 0.10008165,
        0.09995503],
       ...,
       [0.10000493, 0.10000449, 0.10000886, ..., 0.09996959, 0.10000953,
        0.10000353],
       [0.09990329, 0.09996445, 0.09995206, ..., 0.1000805 , 0.10005119,
        0.10007001],
       [0.10001411, 0.10003097, 0.09996562, ..., 0.09999459, 0.09999294,
        0.09997147]], dtype=float32)

In [28]:
model.weights

[<tf.Variable 'dense_12/kernel:0' shape=(5000, 2500) dtype=float32, numpy=
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)>,
 <tf.Variable 'dense_12/bias:0' shape=(2500,) dtype=float32, numpy=array([nan, nan, nan, ..., nan, nan, nan], dtype=float32)>,
 <tf.Variable 'dense_13/kernel:0' shape=(2500, 1200) dtype=float32, numpy=
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)>,
 <tf.Variable 'dense_13/bias:0' shape=(1200,) dtype=float32, numpy=array([nan, nan, nan, ..., nan, nan, nan], dtype=float