# Image Classification using Satellite Images and CNNs: Preprocessing, Analysis and Evaluation
### By: Christian Tan_PH

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;This notebook provides an in-depth solution for training a machine learning model for image classification using satellite image chips with various land cover/land use and atmospheric conditions. <br> <br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;It covers all essential stages such as data preparation, data analysis, model definition, training, evaluation and prediction. The code employs various libraries like numpy, pandas, matplotlib, tensorflow, keras, and scikit-learn to perform different tasks. The CNN architecture is used to train the model, which is a robust model for image classification tasks.<br><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;This code is perfect as a starting point for building similar models for image classification tasks and can be easily adapted to different datasets and requirements.

# Importing Libraries

In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import os
import gc

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential 

import keras as k
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, BatchNormalization

import cv2
from tqdm import tqdm
from collections import Counter

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

import plotly.express as px
from tensorflow.keras.optimizers import Adam, Adagrad, RMSprop
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from sklearn.metrics import classification_report
from torch.utils.data import random_split
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST
from torch.optim import Adam
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import argparse
import torch
import time
# import the necessary packages
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten
import torch.nn.functional as F

# Loading The Dataset

In [4]:
# Define the base path for the dataset
path = "../input/planets-dataset/planet/planet/"

# Join the base path with the train-jpg folder
path_train = os.path.join(path, "train-jpg")

# Join the base path with the test-jpg folder
path_test = os.path.join(path, "test-jpg")

# Use the os.listdir function to get the number of files in the train-jpg and test-jpg folders
print(
    f"train files: {len(os.listdir(path_train))}, "
    f"test files: {len(os.listdir(path_test))}"
)


train files: 40479, test files: 40669


In [5]:
train_path = os.path.join(path, "train_classes.csv")
labels_tr_df = pd.read_csv(train_path)
labels_tr_df.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [6]:
test_path = os.path.join(path, "train_classes.csv")
labels_tst_df = pd.read_csv(test_path)
labels_tst_df.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [7]:
# Define the path to the train_classes.csv file
path_train_class = os.path.join(path, "train_classes.csv")

# Read the train_classes.csv file and store it in a DataFrame
df_train = pd.read_csv(path_train_class)

# Print the shape of the DataFrame
print(df_train.shape)

# Display the first 5 rows of the DataFrame
df_train.head()

(40479, 2)


Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


# Exploring and Understanding the Labels in the dataset

In [8]:
# Number Of Unique Tags In The Dataset
# Avereage # Of Labels Per Image

all_tags = [item for sublist in list(df_train['tags'].apply(lambda row: row.split(" ")).values) for item in sublist]
print('total of {} non-unique tags in all training images'.format(len(all_tags)))
print('average number of labels per image {}'.format(1.0*len(all_tags)/df_train.shape[0]))

total of 116278 non-unique tags in all training images
average number of labels per image 2.8725511993873365


In [9]:
# Label Distribution

# Add a new column 'list_tags' to the DataFrame by splitting the 'tags' column on the space character
df_train["list_tags"] = df_train.tags.str.split(" ")

# Get the values of the new column
row_tags = df_train.list_tags.values

# Flatten the list of tags
tags = [tag for row in row_tags for tag in row]

# Count the occurrences of each tag
counter_tags = Counter(tags)

# Create a new DataFrame with the tag and total columns
df_tags = pd.DataFrame(
    {"tag": counter_tags.keys(), "total": counter_tags.values()}
).sort_values("total")

# Create a bar chart of the tag distribution using Plotly
fig = px.bar(df_tags, x="total", y="tag", orientation="h", 
             color="total",
)

# Update the chart title
fig.update_layout(title="Tags distribution")

# Show the chart
fig.show()


# Machine Learning
## Preparing the Data

In [10]:
# Drop the created "list_tags" column

df_train = df_train.drop("list_tags", axis='columns')
df_train.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [11]:
# Initialization and Image Reading

# Initialize empty lists to store the training images and their labels
x_train = []
y_train = []

# Flatten the list of tags
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

# Create a label map for the unique tags in the dataset
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

# Loop through the training DataFrame
for f, tags in tqdm(df_train.values, miniters=1000):
    # Read the image file
    img = cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.format(f))
    # Initialize an array of zeros for the targets
    targets = np.zeros(17)
    # Loop through the tags for the current image
    for t in tags.split(' '):
        # Set the corresponding target value to 1
        targets[label_map[t]] = 1 
    # Append the image and its labels to the appropriate lists
    x_train.append(cv2.resize(img, (64, 64)))  # Indicate the IMG Size
    y_train.append(targets)

# Convert the lists to numpy arrays
x_train = np.array(x_train, np.float16) / 255.
y_train = np.array(y_train, np.uint8)


100%|██████████| 40479/40479 [05:40<00:00, 118.73it/s]


In [12]:
# Converting the lists of images and labels to numpy arrays and normalizing the pixel values of the images. 
y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float16) / 255.0

# Splitting the data into train and validation sets. 
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True, random_state = 1)

# Prints the shape of the training and validation data.
print("Train data shape:",x_train.shape)
print("Train label shape:",y_train.shape)

print("Validation data shape:",x_val.shape)
print("Validation label shape:",y_val.shape)


Train data shape: (32383, 64, 64, 3)
Train label shape: (32383, 17)
Validation data shape: (8096, 64, 64, 3)
Validation label shape: (8096, 17)


In [13]:
# Free up some memory that is not being used by the program
gc.collect()

82

## Establishing Evaluation Metrics for the Model

In [14]:
# Defining a function that calculates the F-beta score for a given set of true labels and predicted labels.
# The function balances precision and recall and it is useful when there is an imbalance in the number of positive and negative examples in the data.

def fbeta(y_true, y_pred, threshold_shift=0):
    beta = 2

    # Clipping y_pred between 0 and 1
    y_pred = K.clip(y_pred, 0, 1)

    # Rounding y_pred to binary values
    y_pred_bin = K.round(y_pred + threshold_shift)

    # Counting true positives, false positives, and false negatives
    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))

    # Calculating precision and recall
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())


In [15]:
# This code defines a function that calculates the accuracy score for a given set of true labels and predicted labels.
def accuracy_score(y_true, y_pred, epsilon = 1e-4):
    
    # casting the true labels and predicted labels to float32
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.greater(tf.cast(y_pred, tf.float32), tf.constant(0.5)), tf.float32)
    
    # counting the true positives
    tp = tf.reduce_sum(y_true * y_pred, axis = 1)
    
    # counting the false positives
    fp = tf.reduce_sum(y_pred, axis = 1) - tp
    
    # counting the false negatives
    fn = tf.reduce_sum(y_true, axis = 1) - tp
    
    # casting the true labels and predicted labels to boolean
    y_true = tf.cast(y_true, tf.bool)
    y_pred = tf.cast(y_pred, tf.bool)
    
    # counting the true negatives
    tn = tf.reduce_sum(tf.cast(tf.logical_not(y_true), tf.float32) * tf.cast(tf.logical_not(y_pred), tf.float32), 
                       axis = 1)
    #calculating the accuracy score
    return (tp + tn)/(tp + tn + fp + fn + epsilon)


## Constructing the Neural Network Architecture

In [16]:
# Importing different optimization algorithms from tensorflow.keras.optimizers
from tensorflow.keras.optimizers import Adam, Adagrad, RMSprop

# Instantiate the optimizer objects
optimizer_Adam = Adam()
optimizer_Adagrad = Adagrad()
optimizer_RMSprop = RMSprop()

In [17]:
# Define The Model
model = keras.Sequential()

# Adding The Layers
# Batch Normalization layer is added as the first layer of the model, which normalize the input data.
model.add(BatchNormalization(input_shape=(64, 64, 3)))

# Convolutional layers and MaxPooling layers are added to extract features from the input images and reduce the spatial dimensions of the feature maps respectively.
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Dropout layers are added to prevent overfitting.
model.add(Dropout(0.2))

# Same set of layers are added for the next set of features
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

# Flatten layer is added to convert the 2D feature maps into a 1D feature vector
model.add(Flatten())

# Fully connected layers (dense layers) and dropout layers are added
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(17, activation='sigmoid'))

# Compiling the model by specifying the loss function, optimizer, and evaluation metrics
model.compile(optimizer=optimizer_Adam,
              loss='binary_crossentropy',
              metrics=[fbeta, accuracy_score])

# Training the model on the training data for 10 epoch with a batch size of 128, and validating the model on the validation data
history = model.fit(x_train, y_train,
                      batch_size=128,
                      epochs=10,
                      verbose=1,
                      validation_data=(x_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 64, 64, 3)         12        
_________________________________________________________________
conv2d (Conv2D)              (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 62, 62, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 31, 31, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 29, 29, 64)        3

In [19]:
fig, ax = plt.subplots()
ax.plot(history.history['loss'])
fig.show()

In [20]:
# Plotting the training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

# Adding title, y-label and x-label to the plot
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

# Adding legend to the plot
plt.legend(['train', 'validation'], loc='upper left')

# Showing the plot
plt.show()

In [21]:
# Evaluate the model on the validation set
model.evaluate(x_val, y_val)

# Calculate the f-beta score for the training set
train_fscore = fbeta_score(y_train, np.round(model.predict(x_train)), beta=2,average = 'weighted')
print("train fscore: ", train_fscore)

# Calculate the f-beta score for the validation set
val_fscore = fbeta_score(y_val, np.round(model.predict(x_val)), beta=2, average = 'weighted')
print("val fscore: ", val_fscore)

train fscore:  0.8189473104612389
val fscore:  0.8021128620500748


In [22]:
class myCNN(nn.Module):

    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=1, stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, kernel_size=1, stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 17),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 7 * 7)
        x = self.classifier(x)
        return torch.sigmoid(x)

In [23]:
# define training hyperparameters
INIT_LR = 1e-3
BATCH_SIZE = 64
EPOCHS = 10
# define the train and val splits
TRAIN_SPLIT = 0.75
VAL_SPLIT = 1 - TRAIN_SPLIT
# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
# initialize the train, validation, and test data loaders
trainDataLoader = DataLoader(df_train, shuffle=True,batch_size=BATCH_SIZE)
#valDataLoader = DataLoader(valData, batch_size=BATCH_SIZE)
#testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE)
# calculate steps per epoch for training and validation set
trainSteps = len(trainDataLoader.dataset) // BATCH_SIZE
#valSteps = len(valDataLoader.dataset) // BATCH_SIZE

In [25]:
trainDataLoader

<torch.utils.data.dataloader.DataLoader at 0x7efbe06cec10>

In [26]:
# initialize the LeNet model
print("[INFO] initializing the LeNet model...")
model_nn = myCNN().to(device)
# initialize our optimizer and loss function
opt = Adam(model_nn.parameters(), lr=INIT_LR)
lossFn = nn.CrossEntropyLoss()
# initialize a dictionary to store training history
H = {
	"train_loss": [],
	"train_acc": [],
	"val_loss": [],
	"val_acc": []
}
# measure how long training is going to take
print("[INFO] training the network...")
startTime = time.time()

[INFO] initializing the LeNet model...
[INFO] training the network...



The `lr` argument is deprecated, use `learning_rate` instead.



In [27]:
class MyDataset(Dataset):
 
  def __init__(self,file_name):
    price_df=pd.read_csv(file_name)
 
    x=x_train # get older previous process and add it here
    y=y_train
 
    self.x_train=torch.tensor(x,dtype=torch.float32)
    self.y_train=torch.tensor(y,dtype=torch.float32)
 
  def __len__(self):
    return len(self.y_train)
   
  def __getitem__(self,idx):
    return self.x_train[idx],self.y_train[idx]

In [28]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from PIL import Image

def get_labels(fname):
    with open(fname,'r') as f:
        labels = [t.strip() for t in f.read().split(',')]
    labels2idx = {t:i for i,t in enumerate(labels)}
    idx2labels = {i:t for i,t in enumerate(labels)}
    return labels,labels2idx,idx2labels

class PlanetData(Dataset):

    def __init__(self, csv_file, root_dir, labels_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.labels, self.labels2idx, self.idx2labels = get_labels(labels_file)
        self.n_labels = len(self.labels)
        self.transform = transform


    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        img = Image.open(img_name + '.jpg').convert('RGB')
        labels = self.data.iloc[idx, 1]
        target = torch.zeros(self.n_labels)
        label_idx = torch.LongTensor([self.labels2idx[tag] for tag in labels.split(' ')])
        target[label_idx] = 1
        if self.transform:
            img = self.transform(img)
        return img, target

In [29]:
class RandomVerticalFlip(object):
    """Horizontally flip the given PIL.Image randomly with a probability of 0.5."""

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be flipped.
        Returns:
            PIL.Image: Randomly flipped image.
        """
        if np.random.random() < 0.5:
            return img.transpose(Image.FLIP_TOP_BOTTOM)
        return img

class RandomRotation(object):
    """Rotate PIL.Image randomly (90/180/270 degrees)with a probability of 0.5."""

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be rotated.
        Returns:
            PIL.Image: Randomly rotated image.
        """
        if np.random.random() < 0.5:
            deg = np.random.randint(1,3)*90.
            return img.rotate(deg)
        return img

class RandomTranslation(object):
    """Translates PIL.Image randomly (0-10 pixels) with a probability of 0.5."""

    def __init__(self,max_vshift=10, max_hshift=10):
        self.max_vshift = max_vshift
        self.max_hshift = max_hshift

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be translated.
        Returns:
            PIL.Image: Randomly translated image.
        """
        if np.random.random() < 0.5:
            hshift = np.random.randint(-self.max_hshift,self.max_hshift)
            vshift = np.random.randint(-self.max_vshift,self.max_vshift)
            return img.transform(img.size, Image.AFFINE, (1, 0, hshift, 0, 1, vshift))
        return img

In [30]:
import torch
import torchvision.transforms as transforms

train_transforms = transforms.Compose([transforms.RandomCrop(224),
                        transforms.RandomHorizontalFlip(),
                        RandomRotation(),
                        RandomTranslation(),
                        RandomVerticalFlip(),
                        transforms.ToTensor()])
trainset = PlanetData('/kaggle/input/planets-dt/train_set_norm.csv', '/kaggle/input/planets-dataset/planet/planet/train-jpg',
                '/kaggle/input/planets-dt/labels.txt', train_transforms)
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True, num_workers=0)

In [31]:
# loop over our epochs
for e in range(0, EPOCHS):
    # set the model in training mode
    model_nn.train()
    # initialize the total training and validation loss
    totalTrainLoss = 0
    train_losses = []
    preds = []
    ys = []
    totalValLoss = 0
    # initialize the number of correct predictions in the training
    # and validation step
    # loop over the training set
    for i, (x,y) in enumerate(train_loader):
        # send the input to the device
        ys.append(y)
        (x, y) = (x.to(device), y.to(device))
        print(i) 
        # perform a forward pass and calculate the training loss
        pred = model_nn(x)
        preds.append(pred)
        loss = lossFn(pred, y)
        # zero out the gradients, perform the backpropagation step,
        # and update the weights
        opt.zero_grad()
        loss.backward()
        opt.step()
        # add the loss to the total training loss so far and
        # calculate the number of correct predictions
        totalTrainLoss += loss
        train_losses.append(loss)
        


FLIP_TOP_BOTTOM is deprecated and will be removed in Pillow 10 (2023-07-01). Use Transpose.FLIP_TOP_BOTTOM instead.


AFFINE is deprecated and will be removed in Pillow 10 (2023-07-01). Use Transform.AFFINE instead.



0


RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`

In [None]:
from sklearn.metrics import classification_report

print(classification_report(ys, preds))

In [None]:
import matplotlib.pyplot as plt

x_data = list(range(0, i))
y_data = train_losses

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

ax.plot(x_data, y_data)
ax.set_xlabel('data trained')
ax.set_ylabel('CrossEntropyLoss')

plt.show()

## Evaluating The Model Using The Test Data

In [None]:
# Read the sample submission CSV file and store it in a DataFrame
df_samplesub = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')

# The DataFrame 'df_samplesub' now contains the data from the sample submission CSV file


In [None]:
# Create two separate DataFrames for the test and additional test files

# The first DataFrame will contain the labels for the test-jpg files
test = df_samplesub[0 : 40669]

# The second DataFrame will contain the labels for the test-jpg-additional files
files = df_samplesub[40669 : ]


In [None]:
# Loading the test images

# Initialize an empty list to store the images
test_img = []

# Loop through the test DataFrame
for image_name, tags in tqdm(test.values, miniters=1000):
    # Read the image file
    arr = cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(image_name))
    # Resize the image to (64, 64)
    test_img.append(cv2.resize(arr, (64, 64)))

# Loop through the additional test files DataFrame
for image_name, tags in tqdm(files.values, miniters=1000):
    # Read the image file
    arr = cv2.imread('../input/planets-dataset/test-jpg-additional/test-jpg-additional/{}.jpg'.format(image_name))
    # Resize the image to (64, 64)
    test_img.append(cv2.resize(arr, (64, 64)))

# Convert the list of images to a numpy array and normalize the pixel values
test_img = np.array(test_img, np.float16)/255.0


In [None]:
# Free up some memory that is not being used by the program.. again
gc.collect()


In [None]:
# Running the predictions

# Initialize an empty list to store the predictions
yres = []

# Make predictions on the test images using the model
predictions = model.predict(test_img, batch_size = 64, verbose = 2)

# Append the predictions to the yres list
yres.append(predictions)


In [None]:
# Free up some memory that is not being used by the program.. again again
gc.collect()


In [None]:
# Convert the encoded labels back to their original form

# Initialize an empty array to store the decoded labels
sub = np.array(yres[0])

# Loop through the encoded labels
for i in range (1, len(yres)):
    # Add the encoded label to the array
    sub += np.array(yres[i])

# Convert the array to a DataFrame
sub = pd.DataFrame(sub, columns = label_map)
