**Build a Predictive Model:** Lets create a classification model to test if it's possible to accurately predict which stress a plant is experiencing based solely on its spectral data.  

In [1]:
# Lets import the necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
import keras

# import the first sheet of the dataset
df1 = pd.read_excel("Datos1_InteraccionesNIR.xlsx", sheet_name=0)

In [2]:
#Let's separate the features and the target variable
X_features = df1.drop(columns=["Tratamiento", "Planta"])

y_target = df1["Tratamiento"]

print("features dataframe:")
X_features.head()

features dataframe:


Unnamed: 0,350,351,352,353,354,355,356,357,358,359,...,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
0,0.06103,0.060394,0.053344,0.054736,0.057163,0.055928,0.04988,0.043096,0.041127,0.044614,...,0.044963,0.044834,0.044802,0.044657,0.04437,0.044198,0.044046,0.043928,0.043878,0.043752
1,0.060823,0.060909,0.056016,0.056755,0.057975,0.056499,0.051715,0.047413,0.0457,0.046623,...,0.037656,0.037458,0.037266,0.03712,0.036929,0.036832,0.036861,0.036779,0.036661,0.03657
2,0.059742,0.060113,0.05732,0.05928,0.059741,0.057078,0.053557,0.049205,0.047486,0.048903,...,0.036252,0.036049,0.035732,0.035558,0.035431,0.035338,0.035368,0.035286,0.035157,0.035058
3,0.048861,0.047099,0.044486,0.04934,0.052701,0.050598,0.044007,0.038449,0.036496,0.038123,...,0.033894,0.033762,0.033693,0.033536,0.033412,0.033204,0.032831,0.032637,0.032501,0.032376
4,0.063155,0.060788,0.057388,0.058424,0.060128,0.059009,0.053017,0.048921,0.047316,0.048053,...,0.033897,0.033729,0.033675,0.033571,0.033515,0.033441,0.033317,0.033257,0.033285,0.033205


In [3]:
print("target variable:")
y_target.head()

target variable:


0    Control
1    Control
2    Control
3    Control
4    Control
Name: Tratamiento, dtype: object

In [4]:
#let's create an instance of OrdinalEncoder
encoder = OrdinalEncoder()

# Apply OrdinalEncoder to the target variable
y_target_encoded = encoder.fit_transform(y_target.values.reshape(-1, 1))

# Check the encoded target variable
y_target_encoded[-15:]

# Let's print the unique values in the array to see the encoding
print(np.unique(y_target_encoded))

# see that theres 8 unique values in the target variable
print(y_target.unique())

[0. 1. 2. 3. 4. 5. 6. 7.]
['Control' 'Ralstonia' 'Fusarium' 'E_Hidrico' 'Ral_Fus' 'Ral_EH' 'Fus_EH'
 'Fus_EH_Ral']


In [5]:
# lets split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_features, y_target_encoded, test_size=0.2, random_state=42)

Let's make a 1 dimensional Convolutional Neural Network (1D CNN) using TensorFlow/Keras.

In [6]:
# Reshape the input data for Conv1D (samples, timestamps, features)
# We need to add an extra dimension to the data
X_train_reshaped = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

# Get the number of classes from the data
num_classes = len(np.unique(y_target_encoded))
print(f"Number of classes: {num_classes}")

# Define the input shape for our Conv1D model
input_shape = (X_train.shape[1], 1)  # (2151, 1)
print(f"Input shape: {input_shape}")

# Build the 1D CNN model
model = keras.Sequential([
    # Input layer
    keras.layers.Input(shape=input_shape),
    
    # First Conv1D block
    keras.layers.Conv1D(filters=32, kernel_size=5, activation='relu', padding='same'),
    keras.layers.MaxPooling1D(pool_size=4),
    
    # Second Conv1D block
    keras.layers.Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'),
    keras.layers.MaxPooling1D(pool_size=4),
    
    # Flatten the output to feed into Dense layers
    keras.layers.Flatten(),
    
    # Dense hidden layer
    keras.layers.Dense(100, activation='relu'),
    
    # Dropout for regularization to prevent overfitting
    keras.layers.Dropout(0.5),
    
    # Output layer with softmax for multi-class classification
    keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',  # Use sparse since our labels are integers
    metrics=['accuracy']
)

# Display the model architecture
model.summary()

Number of classes: 8
Input shape: (2151, 1)


In [None]:
# Define batch size and number of epochs
batch_size = 128
epochs = 100

#lets define a callback for early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping])