#### *Import the relevant Libraries*

In [1]:
import numpy as np

In [2]:
data = np.loadtxt('sonar-processed.csv', delimiter=',')
data

array([[0.02  , 0.0371, 0.0428, ..., 0.009 , 0.0032, 0.    ],
       [0.0453, 0.0523, 0.0843, ..., 0.0052, 0.0044, 0.    ],
       [0.0262, 0.0582, 0.1099, ..., 0.0095, 0.0078, 0.    ],
       ...,
       [0.0522, 0.0437, 0.018 , ..., 0.0077, 0.0031, 1.    ],
       [0.0303, 0.0353, 0.049 , ..., 0.0036, 0.0048, 1.    ],
       [0.026 , 0.0363, 0.0136, ..., 0.0061, 0.0115, 1.    ]])

#### *Seperate the target and input columns*

In [3]:
input_column = data[:,0:-1]

target_column = data[:,-1]

In [4]:
input_column, target_column

(array([[0.02  , 0.0371, 0.0428, ..., 0.0084, 0.009 , 0.0032],
        [0.0453, 0.0523, 0.0843, ..., 0.0049, 0.0052, 0.0044],
        [0.0262, 0.0582, 0.1099, ..., 0.0164, 0.0095, 0.0078],
        ...,
        [0.0522, 0.0437, 0.018 , ..., 0.0138, 0.0077, 0.0031],
        [0.0303, 0.0353, 0.049 , ..., 0.0079, 0.0036, 0.0048],
        [0.026 , 0.0363, 0.0136, ..., 0.0036, 0.0061, 0.0115]]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,

#### *Shuffle the data*

In [5]:
# Create a shuffle index
shuffled_indices = np.arange(input_column.shape[0])
np.random.shuffle(shuffled_indices)


shuffle_input_column = input_column[shuffled_indices]
shuffle_target_column =target_column[shuffled_indices]

#### *Split the dataset into train, validation and test*


In [6]:
#Count the total number of samples
samples_count = shuffle_input_column.shape[0]

# 80% as train data
train_samples_count = int(0.8 * samples_count)

#10% as test data
validation_samples_count = int(0.1 * samples_count)

#10% as validation data
test_samples_count = samples_count - train_samples_count - validation_samples_count


train_inputs = shuffle_input_column[:train_samples_count]
train_targets = shuffle_target_column[:train_samples_count]


validation_inputs = shuffle_input_column[train_samples_count:train_samples_count+validation_samples_count]
validation_targets = shuffle_target_column[train_samples_count:train_samples_count+validation_samples_count]


test_inputs = shuffle_input_column[train_samples_count+validation_samples_count:]
test_targets = shuffle_target_column[train_samples_count+validation_samples_count:]



# Print the number of targets that are 1s, the total number of samples, and the proportion for training, validation, and test.
print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)

88.0 166 0.5301204819277109
12.0 20 0.6
11.0 22 0.5


#### *Save the three datasets in *.npz*

In [7]:



np.savez('sonar_data_train', inputs=train_inputs, targets=train_targets)
np.savez('sonar_data_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('sonar_data_test', inputs=test_inputs, targets=test_targets)