In [40]:
print(tf.__version__)

2.17.0


# Structured data classification 0

### The dataset

Here's the description of each feature:

## Setup

In [4]:
import os

# TensorFlow is the only backend that supports string inputs.
os.environ["KERAS_BACKEND"] = "tensorflow"

import tensorflow as tf
import pandas as pd
import keras
from keras import layers

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [5]:
import random
SEED = 2
random.seed(SEED)
np.random.seed(SEED)

## Preparing the data

Let's download the data and load it into a Pandas dataframe:

In [7]:
Kenya = pd.read_csv("GeoAI/Agricultural Plastic/Data/Kenya_training.csv")
Spain =  pd.read_csv("GeoAI/Agricultural Plastic/Data/Spain_training.csv")
VNM =  pd.read_csv("GeoAI/Agricultural Plastic/Data/VNM_training.csv")
VNM.rename(columns={'Lon': 'lon', 'Lat': 'lat'}, inplace=True) # It's to allign with the other two sources.

dataframe = pd.concat([Kenya, Spain, VNM], axis=0)

dataframe = dataframe.drop(['ID'], axis=1)
dataframe['TARGET'] = dataframe['TARGET']-1


In [8]:
cols = dataframe.columns
y = dataframe.pop('TARGET')
X = dataframe
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
dataframe_test = pd.concat([X_test, y_test], axis=1)
dataframe_test.columns = cols
dataframe = pd.concat([X_train, y_train], axis=1)
dataframe.columns = cols

The dataset includes 2825 samples with 18 columns per sample (13 features, plus the target
label):

In [11]:
#print(dataframe.shape)
#print(dataframe.columns)
#dataframe.head()

Here's a preview of a few samples:

In [12]:
val_dataframe = dataframe.sample(frac=0.2, random_state=2)
train_dataframe = dataframe.drop(val_dataframe.index)

print(
    f"Using {len(train_dataframe)} samples for training "
    f"and {len(val_dataframe)} for validation"
)

Using 1163 samples for training and 378 for validation


Let's generate `tf.data.Dataset` objects for each dataframe:

In [28]:

def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("TARGET")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

Each `Dataset` yields a tuple `(input, target)` where `input` is a dictionary of features
and `target` is the value `0` or `1`:

In [29]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

Input: {'lon': <tf.Tensor: shape=(), dtype=float64, numpy=37.30357524>, 'lat': <tf.Tensor: shape=(), dtype=float64, numpy=0.096344314>, 'blue_p50': <tf.Tensor: shape=(), dtype=float64, numpy=2708.0>, 'green_p50': <tf.Tensor: shape=(), dtype=float64, numpy=2882.0>, 'nir_p50': <tf.Tensor: shape=(), dtype=float64, numpy=5196.0>, 'nira_p50': <tf.Tensor: shape=(), dtype=float64, numpy=5631.0>, 're1_p50': <tf.Tensor: shape=(), dtype=float64, numpy=3270.0>, 're2_p50': <tf.Tensor: shape=(), dtype=float64, numpy=5025.0>, 're3_p50': <tf.Tensor: shape=(), dtype=float64, numpy=5508.0>, 'red_p50': <tf.Tensor: shape=(), dtype=float64, numpy=2782.0>, 'swir1_p50': <tf.Tensor: shape=(), dtype=float64, numpy=3969.0>, 'swir2_p50': <tf.Tensor: shape=(), dtype=float64, numpy=2853.0>, 'VV_p50': <tf.Tensor: shape=(), dtype=float64, numpy=-7.775951>, 'VH_p50': <tf.Tensor: shape=(), dtype=float64, numpy=-13.336234>}
Target: tf.Tensor(0, shape=(), dtype=int64)


2024-08-01 14:24:39.206235: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Let's batch the datasets:

In [30]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

## Feature preprocessing with Keras layers


All features are continuous numerical features:

For each of these features, we will use a `Normalization()` layer to make sure the mean
of each feature is 0 and its standard deviation is 1.

Below, we define a utility function to do that operation:

- `encode_numerical_feature` to apply featurewise normalization to numerical features.

In [31]:

def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = layers.Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

## Build a model

With this done, we can create our end-to-end model:

### Adaptative Learning Rate

In [36]:
class AdaptiveLearningRateScheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_learning_rate, decay_steps, decay_rate):
        self.initial_learning_rate = tf.cast(initial_learning_rate, tf.float32)
        self.decay_steps = tf.cast(decay_steps, tf.float32)
        self.decay_rate = tf.cast(decay_rate, tf.float32)

    def __call__(self, step):
        step = tf.cast(step, tf.float32)  # Ensure step is also float32
        return self.initial_learning_rate * tf.math.pow(self.decay_rate, (step / self.decay_steps))

# Example usage
initial_learning_rate = 0.001
decay_steps = 1000
decay_rate = 0.96

lr_schedule = AdaptiveLearningRateScheduler(initial_learning_rate, decay_steps, decay_rate)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',    # Metric to monitor
    patience=1000,           # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True # Restore model weights from the epoch with the best value of the monitored quantity
)


In [37]:
 # Numerical features
lon = keras.Input(shape=(1,), name="lon")
lat = keras.Input(shape=(1,), name="lat")
blue_p50 = keras.Input(shape=(1,), name="blue_p50")
green_p50 = keras.Input(shape=(1,), name="green_p50")
nir_p50 = keras.Input(shape=(1,), name="nir_p50")
nira_p50 = keras.Input(shape=(1,), name="nira_p50")
re1_p50 = keras.Input(shape=(1,), name="re1_p50")
re2_p50 = keras.Input(shape=(1,), name="re2_p50")
re3_p50 = keras.Input(shape=(1,), name="re3_p50")
red_p50 = keras.Input(shape=(1,), name="red_p50")
swir1_p50 = keras.Input(shape=(1,), name="swir1_p50")
swir2_p50 = keras.Input(shape=(1,), name="swir2_p50")
VV_p50 = keras.Input(shape=(1,), name="VV_p50")
VH_p50 = keras.Input(shape=(1,), name="VH_p50")

all_inputs = [
    lon,
    lat,
    blue_p50,
    green_p50,
    nir_p50,
    nira_p50,
    re1_p50,
    re2_p50,
    re3_p50,
    red_p50,
    swir1_p50,
    swir2_p50,
    VV_p50,
    VH_p50
]

# Integer categorical features
lon_encoded = encode_numerical_feature(lon, "lon", train_ds)
lat_encoded = encode_numerical_feature(lat, "lat", train_ds)
blue_p50_encoded = encode_numerical_feature(blue_p50, "blue_p50", train_ds)
green_p50_encoded = encode_numerical_feature(green_p50, "green_p50", train_ds)
nir_p50_encoded = encode_numerical_feature(nir_p50, "nir_p50", train_ds)
nira_p50_encoded = encode_numerical_feature(nira_p50, "nira_p50", train_ds)
re1_p50_encoded = encode_numerical_feature(re1_p50, "re1_p50", train_ds)
re2_p50_encoded = encode_numerical_feature(re2_p50, "re2_p50", train_ds)
re3_p50_encoded = encode_numerical_feature(re3_p50, "re3_p50", train_ds)
red_p50_encoded = encode_numerical_feature(red_p50, "red_p50", train_ds)
swir1_p50_encoded = encode_numerical_feature(swir1_p50, "swir1_p50", train_ds)
swir2_p50_encoded = encode_numerical_feature(swir2_p50, "swir2_p50", train_ds)
VV_p50_encoded = encode_numerical_feature(VV_p50, "VV_p50", train_ds)
VH_p50_encoded = encode_numerical_feature(VH_p50, "VH_p50", train_ds)

all_features = layers.concatenate(
    [
        lon_encoded,
        lat_encoded,
        blue_p50_encoded,
        green_p50_encoded,
        nir_p50_encoded,
        nira_p50_encoded,
        re1_p50_encoded,
        re2_p50_encoded,
        re3_p50_encoded,
        red_p50_encoded,
        swir1_p50_encoded,
        swir2_p50_encoded,
        VV_p50_encoded,
        VH_p50
    ]
)
x1 = layers.Dense(64, activation="relu")(all_features)
x1 = layers.Dropout(0.5)(x1)
x2 = layers.Dense(64, activation="relu")(all_features)
x2 = layers.Dropout(0.5)(x2)
x3 = layers.Dense(64, activation="relu")(all_features)
x3 = layers.Dropout(0.5)(x3)
x4 = layers.Dense(64, activation="relu")(all_features)
x4 = layers.Dropout(0.5)(x4)
x5 = layers.Dense(64, activation="relu")(all_features)
x5 = layers.Dropout(0.5)(x5)
x6 = layers.Dense(64, activation="relu")(all_features)
x6 = layers.Dropout(0.5)(x6)
x7 = layers.Dense(64, activation="relu")(all_features)
x7 = layers.Dropout(0.5)(x7)
x8 = layers.Dense(64, activation="relu")(all_features)
x8 = layers.Dropout(0.5)(x3)

x1 = layers.concatenate((x1,x2))
x2 = layers.concatenate((x3,x4))
x3 = layers.concatenate((x5,x6))
x4 = layers.concatenate((x6,x7))

x1 = layers.Dense(128, activation="relu")(x1)
x1 = layers.Dropout(0.5)(x1)
x2 = layers.Dense(128, activation="relu")(x2)
x2 = layers.Dropout(0.5)(x2)
x3 = layers.Dense(128, activation="relu")(x3)
x3 = layers.Dropout(0.5)(x3)
x4 = layers.Dense(128, activation="relu")(x4)
x4 = layers.Dropout(0.5)(x4)

x1 = layers.concatenate((x1,x2))
x2 = layers.concatenate((x3,x4))

x1 = layers.Dense(256, activation="relu")(x1)
x1 = layers.Dropout(0.5)(x1)
x2 = layers.Dense(256, activation="relu")(x2)
x2 = layers.Dropout(0.5)(x2)

x = layers.concatenate((x1,x2))

x = layers.Dense(512, activation="relu")(x)
x = layers.Dropout(0.5)(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)

output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)

2024-08-01 14:28:50.454382: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [38]:
model.compile(optimizer,"binary_crossentropy", metrics=["accuracy"])

## Train the model

In [39]:
model.fit(train_ds, epochs=10, validation_data=val_ds, callbacks=[early_stopping], verbose=2)

Epoch 1/10
37/37 - 4s - 97ms/step - accuracy: 0.5073 - loss: 190.0128 - val_accuracy: 0.5397 - val_loss: 16.9941
Epoch 2/10
37/37 - 0s - 5ms/step - accuracy: 0.4884 - loss: 66.4899 - val_accuracy: 0.5397 - val_loss: 0.9965
Epoch 3/10
37/37 - 0s - 5ms/step - accuracy: 0.4944 - loss: 28.2172 - val_accuracy: 0.4603 - val_loss: 1.6409
Epoch 4/10
37/37 - 0s - 5ms/step - accuracy: 0.4979 - loss: 13.4325 - val_accuracy: 0.5397 - val_loss: 0.6837
Epoch 5/10
37/37 - 0s - 5ms/step - accuracy: 0.5056 - loss: 6.8724 - val_accuracy: 0.5397 - val_loss: 0.6908
Epoch 6/10
37/37 - 0s - 5ms/step - accuracy: 0.5099 - loss: 3.1976 - val_accuracy: 0.4603 - val_loss: 0.6941
Epoch 7/10
37/37 - 0s - 5ms/step - accuracy: 0.5099 - loss: 2.2390 - val_accuracy: 0.4603 - val_loss: 0.6982
Epoch 8/10
37/37 - 0s - 5ms/step - accuracy: 0.4927 - loss: 2.0689 - val_accuracy: 0.4841 - val_loss: 0.6934
Epoch 9/10
37/37 - 0s - 5ms/step - accuracy: 0.5039 - loss: 1.2644 - val_accuracy: 0.5238 - val_loss: 0.6921
Epoch 10/10


<keras.src.callbacks.history.History at 0x758cec594040>

## Inference on new data

To get a prediction for a new sample, you can simply call `model.predict()`. There are
just two things you need to do:

1. wrap scalars into a list so as to have a batch dimension (models only process batches
of data, not single samples)
2. Call `convert_to_tensor` on each feature

In [34]:
from tensorflow.keras.models import load_model
model = load_model('StructData_Model0.h5')

  super().__init__(**kwargs)


sample = {
    "age": 60,
    "sex": 1,
    "cp": 1,
    "trestbps": 145,
    "chol": 233,
    "fbs": 1,
    "restecg": 2,
    "thalach": 150,
    "exang": 0,
    "oldpeak": 2.3,
    "slope": 3,
    "ca": 0,
    "thal": "fixed",
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = model.predict(input_dict)



### Test Eval

In [None]:
from sklearn.metrics import accuracy_score

y = dataframe_test['TARGET']
X = dict(dataframe_test.drop(['TARGET'], axis=1))

pred = model.predict(X).astype(int)
pred = pred.reshape(-1)
accuracy_score(y, pred)

In [None]:
Kenya = pd.read_csv("Kenya_testing.csv")
Spain =  pd.read_csv("Spain_validation.csv")
VNM =  pd.read_csv("VNM_testing.csv")
VNM.rename(columns={'Lon': 'lon', 'Lat': 'lat'}, inplace=True) # It's to allign with the other two sources.

dataframe = pd.concat([Kenya, Spain, VNM], axis=0)


In [None]:
#    dataframe = dataframe.copy()
#    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe)))
#    ds = ds.shuffle(buffer_size=len(dataframe))
#    return ds
ds = dict(dataframe)
#

In [None]:
predictions = model.predict(ds).astype(int)



In [None]:
predictions[0][0]

1

In [None]:
predictions[0]

array([1.], dtype=float32)