## Working Area

### Import Library

In [None]:
!pip install keras-tuner

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

### Data Loading

In [3]:
df = pd.read_csv('/content/cover_data.csv')

# See the data
df.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type32,Soil_Type33,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,class
0,2596,51,3,258,0,510,221,232,148,6279,...,0,0,0,0,0,0,0,0,0,5
1,2590,56,2,212,-6,390,220,235,151,6225,...,0,0,0,0,0,0,0,0,0,5
2,2804,139,9,268,65,3180,234,238,135,6121,...,0,0,0,0,0,0,0,0,0,2
3,2785,155,18,242,118,3090,238,238,122,6211,...,0,0,0,0,0,0,0,0,0,2
4,2595,45,2,153,-1,391,220,234,150,6172,...,0,0,0,0,0,0,0,0,0,5


### Preprocessing

In [4]:
# Check for missing values in the data
df.isnull().sum().sum()

0

There are no missing values in the data.

In [5]:
# Check for data type in the data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 581012 entries, 0 to 581011
Data columns (total 55 columns):
 #   Column                              Non-Null Count   Dtype
---  ------                              --------------   -----
 0   Elevation                           581012 non-null  int64
 1   Aspect                              581012 non-null  int64
 2   Slope                               581012 non-null  int64
 3   Horizontal_Distance_To_Hydrology    581012 non-null  int64
 4   Vertical_Distance_To_Hydrology      581012 non-null  int64
 5   Horizontal_Distance_To_Roadways     581012 non-null  int64
 6   Hillshade_9am                       581012 non-null  int64
 7   Hillshade_Noon                      581012 non-null  int64
 8   Hillshade_3pm                       581012 non-null  int64
 9   Horizontal_Distance_To_Fire_Points  581012 non-null  int64
 10  Wilderness_Area1                    581012 non-null  int64
 11  Wilderness_Area2                    581012 non-null 

All data types are integers.

In [6]:
# Combine the Wilderness Area column into 1 column
df['Wilderness_Area'] = df.loc[:, 'Wilderness_Area1':'Wilderness_Area4'].idxmax(axis=1)
df['Wilderness_Area'] = df['Wilderness_Area'].str.extract(r'(\d+)').astype(int)
df = df.drop(columns=[f"Wilderness_Area{i}" for i in range(1, 5)])

In [7]:
# Combine the Soil Type column into 1 column
df['Soil_Type'] = df.loc[:, 'Soil_Type1':'Soil_Type40'].idxmax(axis=1)
df['Soil_Type'] = df['Soil_Type'].str.extract(r'(\d+)').astype(int)
df = df.drop(columns=[f"Soil_Type{i}" for i in range(1, 41)])

Columns are combined because the data obtained is the result of encoding.

In [8]:
# Reduces class value
df['class'] = df['class']-1

Reducing the class value to match the input in tensorflow.

### Model Training

In [9]:
# Split the data into features (X) and target (y)
X = df.drop(columns=['class'])
y = df['class']

# Normalize the features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [10]:
# Build a Neural Network Model
with tf.device('/device:GPU:0'):
  def build_model(hp):
      model = keras.Sequential()
      model.add(layers.Input(shape=(X_train.shape[1],)))

      for i in range(hp.Int('num_layers', min_value=1, max_value=5)):
          model.add(layers.Dense(units=hp.Int('units_' + str(i), min_value=32, max_value=512, step=32), activation='relu'))

      model.add(layers.Dense(7, activation='softmax'))

      hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

      model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

      return model

  tuner = RandomSearch(
      build_model,
      objective='val_accuracy',
      max_trials=10,
      directory='my_tuner_directory'
  )

  tuner.search(X_train, y_train, epochs=50, validation_data=(X_val, y_val))
  best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

  best_model = tuner.hypermodel.build(best_hps)
  best_model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))

Trial 10 Complete [00h 41m 03s]
val_accuracy: 0.9498631954193115

Best val_accuracy So Far: 0.9507753849029541
Total elapsed time: 05h 49m 41s
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67

### Model Evaluation

In [11]:
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_accuracy:.4f}')

1816/1816 - 3s - loss: 0.1240 - accuracy: 0.9554 - 3s/epoch - 2ms/step
Test accuracy: 0.9554


### Model Saving

In [12]:
from joblib import dump
dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [13]:
best_model.save("TCM.h5")

  saving_api.save_model(


## Model Inference

In [14]:
# Create a dictionary or list to specify the values for each feature
input_data = {
    'Elevation': 2000,
    'Aspect': 318,
    'Slope': 7,
    'Horizontal_Distance_To_Hydrology': 30,
    'Vertical_Distance_To_Hydrology': 4,
    'Horizontal_Distance_To_Roadways': 108,
    'Hillshade_9am': 201,
    'Hillshade_Noon': 234,
    'Hillshade_3pm': 172,
    'Horizontal_Distance_To_Fire_Points': 268,
    'Wilderness_Area': 4,
    'Soil_Type': 17
}

# Create a NumPy array from the values
new_test = np.array(list(input_data.values())).reshape(1, -1)
new_test = scaler.transform(new_test)

# Make a prediction for the single data point
prediction = best_model.predict(new_test)
predicted_class = np.argmax(prediction)
print("Predicted Class:", predicted_class)

Predicted Class: 2


