In [17]:
import os
import numpy as np
import random
import csv
from additional_functions import process_all

In [18]:
# All files were developed collaboratively

class dataset:
    '''
    The dataset class handles initial data loading along with all pre-processing tasks
    '''
    def __init__(self, data_path: str, processed_flag: str):
        '''
        The constructor initializes all of the self variables, and loads the data from the original .data file.
        '''
         # Instantiate self variables
        self.intake_data = []
        self.tune_set = []
        self.validate_set = []
        self.ninety_data = []

        # Data is being read in from original .DATA file
        if (processed_flag == False):
            # Separating the .data file into lines, and shuffling the lines
            with open(data_path, 'r') as file:
                lines = file.readlines()
            # Deliminate strings into lists
            for i in range(len(lines)):
                lines[i] = lines[i].strip()
                lines[i] = lines[i].split(',')  
            # Make the list into a numpy array
            self.intake_data = np.array(lines)

    def normalize(self, prediction_type: str):
        '''
        performs mim-max normalization on the last column of the intake data (example value). This will only be used for regression data.
        '''
        # Separate features and labels
        features = self.intake_data[:, :-1]  # All columns except the last one (features)
        labels = self.intake_data[:, -1]     # Last column (labels)

        # Apply min-max normalization to features
        features_min = features.min(axis=0)
        features_max = features.max(axis=0)
        normalized_features = (features - features_min) / (features_max - features_min)

        if prediction_type == "regression":
            normalized_labels = (labels - labels.min()) / (labels.max() - labels.min())
        else:
            # Create a mapping from the original labels to new labels starting from 0
            unique_labels = np.unique(labels)
            label_mapping = {old_label: new_label for new_label, old_label in enumerate(unique_labels)}

            # Map the labels to new values starting from 0
            new_labels = np.array([label_mapping[label] for label in labels])

            normalized_labels = new_labels

        # Combine normalized features with labels
        # if this does not work make self.intake_data[:,:-1] and self.intake_data[:,-1] equal the normalized labels and features matrices
        normalized_data = np.hstack((normalized_features, normalized_labels.reshape(-1, 1)))
        self.intake_data = normalized_data

        '''
        if prediction_type == "classification":
            # normalize features, not labels
            values = self.intake_data[:,-1].astype(float)
            normalized_values = (values - values.min()) / (values.max() - values.min())
            self.intake_data[:, -1] = normalized_values
        else:
            # normalize entire matrix
        values = self.intake_data[:,-1].astype(float)
        normalized_values = (values - values.min()) / (values.max() - values.min())
        self.intake_data[:, -1] = normalized_values
        '''
    def oh_encode(self):
        '''
        This method goes through each item in the data array, and if the item is not a number, it is replaced with a number (continuization).
        If there are no non-numbers in the dataset, all the numbers are converted to floats.
        '''
        string_to_int = {}
        next_int = 0
        # This function continuizes a single element so it can be vectorized
        def convert_to_num(value):
            nonlocal next_int
            try:
                # Try to convert to float
                return float(value)
            except ValueError:
                # If conversion fails, map the string a number
                if value not in string_to_int:
                    string_to_int[value] = next_int
                    next_int += 1
                return string_to_int[value]

        # Apply convert_to_num to each element in the array
        vectorization = np.vectorize(convert_to_num, otypes=[float])
        self.intake_data = vectorization(self.intake_data)
    def impute(self):
        '''
        Replaces question marks in a dataset with a random value between 1 and 10.
        '''
        for ex_idx in range(len(self.intake_data)):
            for att_idx in range(len(self.intake_data[ex_idx])):
                # if this statement is entered that means there is a missing piece of attribute data, so imputation needs to occur at this location
                if (self.intake_data[ex_idx][att_idx] == '?'):
                    # This will be the imputation method using range 1-10
                        self.intake_data[ex_idx][att_idx] = str(random.randint(1,10))
    def shuffle(self):
        '''
        This method will shuffle the self.intake_data by examples.
        '''
        np.random.shuffle(self.intake_data)
    def sort(self, prediction_type_flag):
        '''
        Sorts the data by its class/target value. We can assume all labels are the last indice of an example.
        The prediction_type_flag essentially tells us if the last indice can be converted to a float or not. Regression datasets are sorted by value
        '''
        if prediction_type_flag == "regression":
            #print('REGRESSION')
            sorted_data = self.intake_data[self.intake_data[:, -1].astype(np.float32).argsort()]
        else:
            #print("CLASSIFICATION")
            sorted_data = self.intake_data[self.intake_data[:, -1].argsort()]
        self.intake_data = sorted_data
    def split(self):
        '''
        Puts the first 10% of the data into its own array (self.tune_set), then the remaining data (self.validate_set) into its own array.
        We should end up with two arrays, both are sorted and stratified. The validation will still need to be separated into partitions.
        '''
        tune_data = []
        for i, example in enumerate(self.intake_data):
            if(i % 10) == 0:
                tune_data.append(example)
            else:
                self.ninety_data.append(example)
        self.tune_set = np.array(tune_data)
        self.ninety_data = np.array(self.ninety_data)
    def fold(self):
        '''
        This method folds self.validate_set into stratified partitions
        '''
        # shape should be (10, # of examples, # of attributes)
        shape = (10, (len(self.ninety_data) // 10) + 1, len(self.ninety_data[0]))
        self.validate_set = np.full(shape, np.nan)
        fold_counts = np.zeros(10)

        # splits data into folds
        for i, example in enumerate(self.ninety_data):
            fold_index = i % 10
            example_position = fold_counts[fold_index]  #This finds the next null example
            self.validate_set[fold_index, int(example_position)] = example
            fold_counts[fold_index] += 1
    def shuffle_splits(self):
        '''
        Shuffles the tune set and validate set after they are complete and stratified
        '''
        np.random.shuffle(self.tune_set)
        for partition_idx, partition in enumerate(self.validate_set):
            np.random.shuffle(partition)
    def remove_attribute(self, indice=0):
        '''
        Takes in an attribute indice, and removes that entire indice from the dataset. This can be used to remove ID numbers
        '''
        self.intake_data = np.delete(self.intake_data, indice, 1)    
    def save(self, filename: str):
        """
        saves the tune set and validation set to a csv file for inspection purposes.
        """
        #get/create the path to the folder that the file should be saved to
        folder_path = os.path.expanduser(f"~/CSCI_447/Project_3/Datasets/processed_data")  
        os.makedirs(folder_path, exist_ok=True)
        tune_file_path = os.path.join(folder_path, (filename+'_tune_set.csv'))
        validate_file_path = os.path.join(folder_path, (filename+'_validate_set.csv'))

        # save the tune set
        shape_info = None
        with open(tune_file_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            if shape_info:
                writer.writerow(["shape"] + list(shape_info))
            writer.writerows(self.tune_set)

        # save the validation set
        reshaped_array = np.array([[';'.join(str(row)) for row in batch] for batch in self.validate_set])
        shape_info = self.validate_set.shape
        with open(validate_file_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            if shape_info:
                writer.writerow(["shape"] + list(shape_info))
            writer.writerows(reshaped_array)
    def extract(self, file_path: str):
        """
        Loads data from a CSV file and converts it back to a numpy array in the original format.
        """
        tune_file_path = file_path+'_tune_set.csv'
        validate_file_path = file_path+'_validate_set.csv'

        # extract the tune set
        with open(tune_file_path, mode='r') as file:
            reader = csv.reader(file)
            rows = list(reader)
        self.tune_set = np.array(rows, dtype=str)

        # extract the validate set
        with open(validate_file_path, mode='r') as file:
            reader = csv.reader(file)
            rows = list(reader)
        shape_info = tuple(map(int, rows[0][1:]))
        data = rows[1:]
        reconstructed_data = [[cell.split(';') for cell in row] for row in data]
        self.validate_set = np.array(reconstructed_data, dtype=str).reshape(shape_info)

In [19]:
user = 'carlthedog3'

abalone_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/abalone.data', False)
cancer_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/breast-cancer-wisconsin.data', False)
fire_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/forestfires.data', False)
glass_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/glass.data', False)
machine_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/machine.data', False)
soybean_data = dataset('/home/'+user+'/CSCI_447/Project_2/Datasets/soybean-small.data', False)

In [20]:
abalone_data.oh_encode()
print(abalone_data.intake_data)
abalone_data.shuffle()
abalone_data.sort('regression')
abalone_data.split()
abalone_data.fold()
abalone_data.shuffle_splits()
print(abalone_data.validate_set)

[[ 0.      0.455   0.365  ...  0.101   0.15   15.    ]
 [ 0.      0.35    0.265  ...  0.0485  0.07    7.    ]
 [ 1.      0.53    0.42   ...  0.1415  0.21    9.    ]
 ...
 [ 0.      0.6     0.475  ...  0.2875  0.308   9.    ]
 [ 1.      0.625   0.485  ...  0.261   0.296  10.    ]
 [ 0.      0.71    0.555  ...  0.3765  0.495  12.    ]]
[[[ 1.      0.54    0.475  ...  0.3075  0.34   16.    ]
  [ 1.      0.735   0.565  ...  0.5145  0.5675 13.    ]
  [ 1.      0.575   0.425  ...  0.18    0.228   8.    ]
  ...
  [ 2.      0.555   0.45   ...  0.1755  0.22    9.    ]
  [ 2.      0.31    0.24   ...  0.0315  0.045   7.    ]
  [ 1.      0.525   0.415  ...  0.171   0.27   13.    ]]

 [[ 0.      0.62    0.47   ...  0.2005  0.2475  8.    ]
  [ 0.      0.415   0.315  ...  0.065   0.103   9.    ]
  [ 0.      0.58    0.465  ...  0.1865  0.385  17.    ]
  ...
  [ 1.      0.52    0.41   ...  0.153   0.225  10.    ]
  [ 1.      0.635   0.5    ...  0.361   0.31   10.    ]
  [ 1.      0.455   0.355  ...  0.

In [21]:
cancer_data.oh_encode()
cancer_data.impute()
cancer_data.shuffle()
cancer_data.sort('classification')
cancer_data.split()
cancer_data.fold()
cancer_data.shuffle_splits()
print(cancer_data.tune_set)

[[8.312680e+05 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  1.000000e+00 1.000000e+00 1.000000e+00 3.000000e+00 1.000000e+00
  2.000000e+00]
 [1.108449e+06 5.000000e+00 3.000000e+00 3.000000e+00 4.000000e+00
  2.000000e+00 4.000000e+00 3.000000e+00 4.000000e+00 1.000000e+00
  4.000000e+00]
 [1.306339e+06 4.000000e+00 4.000000e+00 2.000000e+00 1.000000e+00
  2.000000e+00 5.000000e+00 2.000000e+00 1.000000e+00 2.000000e+00
  2.000000e+00]
 [1.182404e+06 4.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 2.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [1.049837e+06 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [8.370820e+05 2.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 3.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [8.322260e+05 3.000000e+00 4.000000e+00 4.000000e+00 1.000000e+01
  5.000000e+00 1.000000e+00 3.000

In [22]:
fire_data.oh_encode()
fire_data.normalize("regression")
fire_data.shuffle()
fire_data.sort('regression')
fire_data.split()
fire_data.fold()

print(fire_data.tune_set)

[[5.55555556e-01 5.00000000e-01 5.86206897e-01 6.52173913e-01
  9.76138829e-01 1.61268091e-01 1.10812076e-01 1.90730838e-01
  4.85530547e-01 7.80219780e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 7.24137931e-01 4.78260870e-01
  8.56832972e-01 7.64989662e-02 9.27919495e-02 4.09982175e-02
  4.66237942e-01 1.20879121e-01 2.81250000e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 5.86206897e-01 1.00000000e+00
  9.95661605e-01 5.19986216e-01 7.23262345e-01 2.45989305e-01
  9.71061093e-01 1.31868132e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [6.66666667e-01 5.00000000e-01 6.55172414e-01 8.26086957e-01
  9.64208243e-01 4.55547898e-01 8.11607770e-01 1.63992870e-01
  7.78135048e-01 1.31868132e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 6.55172414e-01 8.26086957e-01
  9.33839479e-01 2.81874569e-01 8.53849754e-01 1.10516934e-01
  3.44051447e-01 7.14285714e-01 4.68750000e-01 0.00000000e+0

In [23]:
glass_data.oh_encode()
glass_data.shuffle()
glass_data.sort('classification')
glass_data.split()
glass_data.fold()
glass_data.shuffle_splits()

In [24]:
machine_data.oh_encode()
machine_data.shuffle()
machine_data.sort('regression')
machine_data.split()
machine_data.fold()
machine_data.shuffle_splits()

In [25]:
soybean_data.oh_encode()
soybean_data.shuffle()
soybean_data.sort('classification')
soybean_data.split()
soybean_data.fold()
soybean_data.shuffle_splits()

In [26]:
abalone_data.save('abalone')
cancer_data.save('cancer')
fire_data.save('fire')
glass_data.save('glass')
machine_data.save('machine')
soybean_data.save('soybean')

In [27]:
print(f"Abalone Data:\nTune Set: {abalone_data.tune_set}\nValidate Set: {abalone_data.validate_set}\n\n")

Abalone Data:
Tune Set: [[ 1.      0.615   0.455  ...  0.238   0.315  10.    ]
 [ 1.      0.55    0.43   ...  0.218   0.1945  8.    ]
 [ 2.      0.415   0.31   ...  0.083   0.0915  6.    ]
 ...
 [ 1.      0.55    0.43   ...  0.186   0.225   9.    ]
 [ 1.      0.64    0.48   ...  0.24    0.34   10.    ]
 [ 0.      0.56    0.44   ...  0.2045  0.265  14.    ]]
Validate Set: [[[ 1.      0.54    0.475  ...  0.3075  0.34   16.    ]
  [ 1.      0.735   0.565  ...  0.5145  0.5675 13.    ]
  [ 1.      0.575   0.425  ...  0.18    0.228   8.    ]
  ...
  [ 2.      0.555   0.45   ...  0.1755  0.22    9.    ]
  [ 2.      0.31    0.24   ...  0.0315  0.045   7.    ]
  [ 1.      0.525   0.415  ...  0.171   0.27   13.    ]]

 [[ 0.      0.62    0.47   ...  0.2005  0.2475  8.    ]
  [ 0.      0.415   0.315  ...  0.065   0.103   9.    ]
  [ 0.      0.58    0.465  ...  0.1865  0.385  17.    ]
  ...
  [ 1.      0.52    0.41   ...  0.153   0.225  10.    ]
  [ 1.      0.635   0.5    ...  0.361   0.31   10.  

In [28]:
print(f"Cancer Data:\nTune Set:\n{cancer_data.tune_set}\nValidate Set:\n{cancer_data.validate_set}\n\n")

Cancer Data:
Tune Set:
[[8.312680e+05 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  1.000000e+00 1.000000e+00 1.000000e+00 3.000000e+00 1.000000e+00
  2.000000e+00]
 [1.108449e+06 5.000000e+00 3.000000e+00 3.000000e+00 4.000000e+00
  2.000000e+00 4.000000e+00 3.000000e+00 4.000000e+00 1.000000e+00
  4.000000e+00]
 [1.306339e+06 4.000000e+00 4.000000e+00 2.000000e+00 1.000000e+00
  2.000000e+00 5.000000e+00 2.000000e+00 1.000000e+00 2.000000e+00
  2.000000e+00]
 [1.182404e+06 4.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 2.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [1.049837e+06 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [8.370820e+05 2.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00 1.000000e+00 3.000000e+00 1.000000e+00 1.000000e+00
  2.000000e+00]
 [8.322260e+05 3.000000e+00 4.000000e+00 4.000000e+00 1.000000e+01
  5.000000

In [29]:
print(f"Fire Data:\nTune Set:\n{fire_data.tune_set}\nValidate Set:\n{fire_data.validate_set}\n\n")

Fire Data:
Tune Set:
[[5.55555556e-01 5.00000000e-01 5.86206897e-01 6.52173913e-01
  9.76138829e-01 1.61268091e-01 1.10812076e-01 1.90730838e-01
  4.85530547e-01 7.80219780e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 7.24137931e-01 4.78260870e-01
  8.56832972e-01 7.64989662e-02 9.27919495e-02 4.09982175e-02
  4.66237942e-01 1.20879121e-01 2.81250000e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 5.86206897e-01 1.00000000e+00
  9.95661605e-01 5.19986216e-01 7.23262345e-01 2.45989305e-01
  9.71061093e-01 1.31868132e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [6.66666667e-01 5.00000000e-01 6.55172414e-01 8.26086957e-01
  9.64208243e-01 4.55547898e-01 8.11607770e-01 1.63992870e-01
  7.78135048e-01 1.31868132e-01 4.27083333e-01 0.00000000e+00
  0.00000000e+00]
 [4.44444444e-01 3.75000000e-01 6.55172414e-01 8.26086957e-01
  9.33839479e-01 2.81874569e-01 8.53849754e-01 1.10516934e-01
  3.44051447e-01 7.14285714e-01 4.68750

In [30]:
print(f"Glass Data:\nTune Set:\n{glass_data.tune_set}\nValidate Set:\n{glass_data.validate_set}\n\n")

Glass Data:
Tune Set:
[[1.04000e+02 1.52725e+00 1.38000e+01 3.15000e+00 6.60000e-01 7.05700e+01
  8.00000e-02 1.16400e+01 0.00000e+00 0.00000e+00 2.00000e+00]
 [7.80000e+01 1.51627e+00 1.30000e+01 3.58000e+00 1.54000e+00 7.28300e+01
  6.10000e-01 8.04000e+00 0.00000e+00 0.00000e+00 2.00000e+00]
 [1.88000e+02 1.52315e+00 1.34400e+01 3.34000e+00 1.23000e+00 7.23800e+01
  6.00000e-01 8.83000e+00 0.00000e+00 0.00000e+00 7.00000e+00]
 [5.60000e+01 1.51769e+00 1.24500e+01 2.71000e+00 1.29000e+00 7.37000e+01
  5.60000e-01 9.06000e+00 0.00000e+00 2.40000e-01 1.00000e+00]
 [9.50000e+01 1.51629e+00 1.27100e+01 3.33000e+00 1.49000e+00 7.32800e+01
  6.70000e-01 8.24000e+00 0.00000e+00 0.00000e+00 2.00000e+00]
 [7.50000e+01 1.51596e+00 1.30200e+01 3.56000e+00 1.54000e+00 7.31100e+01
  7.20000e-01 7.90000e+00 0.00000e+00 0.00000e+00 2.00000e+00]
 [8.10000e+01 1.51592e+00 1.28600e+01 3.52000e+00 2.12000e+00 7.26600e+01
  6.90000e-01 7.97000e+00 0.00000e+00 0.00000e+00 2.00000e+00]
 [1.55000e+02 1.516

In [31]:
print(f"Machine Data:\nTune Set:\n{machine_data.tune_set}\nValidate Set:\n{machine_data.validate_set}\n\n")

Machine Data:
Tune Set:
[[1.010e+02 1.120e+02 6.000e+02 7.680e+02 2.000e+03 0.000e+00 1.000e+00
  1.000e+00 1.600e+01 2.000e+01]
 [5.800e+01 6.000e+01 7.000e+02 2.560e+02 2.000e+03 0.000e+00 1.000e+00
  1.000e+00 2.400e+01 1.900e+01]
 [7.400e+01 7.500e+01 7.500e+01 2.000e+03 8.000e+03 6.400e+01 1.000e+00
  3.800e+01 1.440e+02 7.500e+01]
 [7.800e+01 8.400e+01 7.500e+01 3.000e+03 8.000e+03 8.000e+00 3.000e+00
  4.800e+01 6.400e+01 5.400e+01]
 [3.500e+01 4.300e+01 5.000e+01 1.000e+03 4.000e+03 8.000e+00 1.000e+00
  5.000e+00 2.900e+01 2.900e+01]
 [8.700e+01 9.800e+01 1.400e+02 2.000e+03 3.200e+04 3.200e+01 1.000e+00
  5.400e+01 1.410e+02 1.810e+02]
 [6.600e+01 6.700e+01 8.000e+02 2.560e+02 8.000e+03 0.000e+00 1.000e+00
  4.000e+00 1.200e+01 3.400e+01]
 [1.010e+02 1.020e+02 5.700e+01 4.000e+03 1.600e+04 1.000e+00 6.000e+00
  1.200e+01 1.320e+02 8.200e+01]
 [1.800e+01 5.000e+03 3.500e+02 6.400e+01 6.400e+01 0.000e+00 1.000e+00
  4.000e+00 1.000e+01 1.500e+01]
 [1.900e+01 2.600e+01 1.430e+02

In [32]:
print(f"Soybean Data:\nTune Set:\n{soybean_data.tune_set}\nValidate Set:\n{soybean_data.validate_set}\n\n")

Soybean Data:
Tune Set:
[[3. 0. 0. 1. 0. 1. 2. 1. 0. 0. 1. 1. 0. 2. 2. 0. 0. 0. 1. 0. 0. 3. 0. 0.
  0. 2. 1. 0. 4. 0. 0. 0. 0. 0. 0. 1.]
 [1. 1. 2. 0. 0. 0. 1. 2. 1. 0. 1. 1. 0. 2. 2. 0. 0. 0. 1. 0. 2. 2. 0. 0.
  0. 0. 0. 3. 4. 0. 0. 0. 0. 0. 1. 3.]
 [3. 1. 1. 0. 0. 2. 1. 2. 1. 2. 1. 1. 0. 2. 2. 0. 0. 0. 1. 0. 2. 2. 0. 0.
  0. 0. 0. 3. 4. 0. 0. 0. 0. 0. 1. 3.]
 [0. 1. 2. 0. 0. 1. 1. 2. 1. 2. 1. 0. 0. 2. 2. 0. 0. 0. 1. 0. 1. 1. 0. 1.
  0. 0. 0. 3. 4. 0. 0. 0. 0. 0. 0. 2.]
 [6. 0. 2. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 2. 2. 0. 0. 0. 1. 1. 3. 1. 1. 1.
  0. 0. 0. 0. 4. 0. 0. 0. 0. 0. 0. 0.]]
Validate Set:
[[[ 1.  1.  2. ...  0.  1.  3.]
  [ 3.  1.  2. ...  0.  1.  3.]
  [ 3.  0.  2. ...  0.  0.  0.]
  [ 5.  0.  0. ...  0.  0.  1.]
  [ 0.  1.  2. ...  0.  0.  2.]]

 [[ 5.  0.  2. ...  0.  0.  0.]
  [ 0.  1.  2. ...  0.  1.  3.]
  [ 0.  1.  2. ...  0.  1.  2.]
  [ 4.  0.  0. ...  0.  0.  1.]
  [ 0.  1.  1. ...  0.  1.  3.]]

 [[ 4.  0.  0. ...  0.  0.  1.]
  [ 1.  1.  2. ...  0.  1.  3.]
  [nan n