In [49]:
"""TensorFlow Neural Network for Forest Cover Type Prediction"""

'TensorFlow Neural Network for Forest Cover Type Prediction'

In [50]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential,layers,regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

In [51]:
print("Loading and preprocessing data...")
data = pd.read_csv(r'../datasets/forest.csv')

Loading and preprocessing data...


In [52]:
# --- Collapse Wilderness Areas ---
wilderness_cols = [c for c in data.columns if c.startswith("Wilderness_Area")]
data["Wilderness_Area"] = np.argmax(data[wilderness_cols].values, axis=1) + 1
data = data.drop(columns=wilderness_cols)

# --- Collapse Soil Types ---
soil_cols = [c for c in data.columns if c.startswith("Soil_Type")]
data["Soil_Type"] = np.argmax(data[soil_cols].values, axis=1) + 1
data = data.drop(columns=soil_cols)

# Drop Id (not useful for ML)
data = data.drop(columns=["Id"])

In [53]:
data = data.dropna()
print(f"Data shape after preprocessing: {data.shape}")

Data shape after preprocessing: (15120, 13)


In [54]:
data.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Cover_Type,Wilderness_Area,Soil_Type
0,2596,51,3,258,0,510,221,232,148,6279,5,1,29
1,2590,56,2,212,-6,390,220,235,151,6225,5,1,29
2,2804,139,9,268,65,3180,234,238,135,6121,2,1,12
3,2785,155,18,242,118,3090,238,238,122,6211,2,1,30
4,2595,45,2,153,-1,391,220,234,150,6172,5,1,29


In [55]:
# Features and Target
X = data.drop(columns=["Cover_Type"])
y = data["Cover_Type"]


In [56]:
y = y-1

In [57]:
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Number of classes: {len(np.unique(y))}")


Features shape: (15120, 12)
Target shape: (15120,)
Number of classes: 7


In [58]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [59]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [60]:
print(f"Training samples: {X_train_scaled.shape[0]}")
print(f"Test samples: {X_test_scaled.shape[0]}")
print(f"Features: {X_train_scaled.shape[1]}")

Training samples: 12096
Test samples: 3024
Features: 12


In [61]:
model = Sequential()

In [62]:
# Input layer
model.add(layers.Input(shape=(12,)))

# First hidden layer (largest) - 1024 neurons
model.add(layers.Dense(
    1024, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.3))

# Second hidden layer - 512 neurons
model.add(layers.Dense(
    512, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.4))

# Third hidden layer - 256 neurons
model.add(layers.Dense(
    256, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.4))

# Fourth hidden layer - 128 neurons
model.add(layers.Dense(
    128, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.4))

# Fifth hidden layer - 64 neurons
model.add(layers.Dense(
    64, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.3))

# Sixth hidden layer - 32 neurons
model.add(layers.Dense(
    32, 
    activation='relu',
    kernel_regularizer=regularizers.L1L2()
))
model.add(layers.Dropout(0.2))

# Output layer - 7 classes
model.add(layers.Dense(7, activation='softmax'))


In [63]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=50,
    restore_best_weights=True
)

In [64]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [65]:
print("\nModel Architecture:")
model.summary()


Model Architecture:


In [66]:
print("\nTraining the model...")
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=500,
    batch_size=64,
    callbacks=[early_stopping]
)



Training the model...
Epoch 1/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - accuracy: 0.4034 - loss: 1.3836 - val_accuracy: 0.6151 - val_loss: 0.8889
Epoch 2/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5742 - loss: 0.9820 - val_accuracy: 0.6587 - val_loss: 0.8008
Epoch 3/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6131 - loss: 0.8964 - val_accuracy: 0.6901 - val_loss: 0.7564
Epoch 4/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6468 - loss: 0.8302 - val_accuracy: 0.6736 - val_loss: 0.7454
Epoch 5/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6699 - loss: 0.8018 - val_accuracy: 0.7077 - val_loss: 0.7194
Epoch 6/500
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6846 - loss: 0.7750 - val_accuracy: 0.7179 - val_loss: 

In [67]:
print("\nEvaluating the model...")
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")



Evaluating the model...
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8717 - loss: 0.5026
Test Accuracy: 0.8717


In [68]:
y_pred_probs = model.predict(X_test_scaled, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)

In [69]:
y_test_original = y_test + 1
y_pred_original = y_pred + 1

In [70]:
print(f"Accuracy: {accuracy_score(y_test_original, y_pred_original):.4f}")
print("\nClassification Report:")
print(classification_report(y_test_original, y_pred_original))

Accuracy: 0.8717

Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.76      0.79       432
           2       0.77      0.72      0.74       432
           3       0.86      0.84      0.85       432
           4       0.95      0.98      0.96       432
           5       0.89      0.96      0.92       432
           6       0.86      0.88      0.87       432
           7       0.94      0.97      0.95       432

    accuracy                           0.87      3024
   macro avg       0.87      0.87      0.87      3024
weighted avg       0.87      0.87      0.87      3024



In [71]:
sample_indices = range(5)
sample_predictions = y_pred_original[sample_indices]
sample_actual = y_test_original.iloc[sample_indices].tolist()

In [72]:
print(f"\nSample Predictions: {sample_predictions.tolist()}")
print(f"Actual Values: {sample_actual}")


Sample Predictions: [4, 6, 4, 5, 4]
Actual Values: [4, 6, 4, 5, 4]


In [73]:
model.save("forest_cover_model.keras")
print("✅ TensorFlow model saved as forest_cover_model.keras")

✅ TensorFlow model saved as forest_cover_model.keras


In [74]:
def representative_dataset():
    for i in range(100):
        idx = np.random.randint(0, len(X_train_scaled))
        sample = X_train_scaled[idx:idx+1].astype(np.float32)
        yield [sample]

In [75]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset

In [76]:
tflite_model = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\pushp\AppData\Local\Temp\tmpuojhhrs9\assets


INFO:tensorflow:Assets written to: C:\Users\pushp\AppData\Local\Temp\tmpuojhhrs9\assets


Saved artifact at 'C:\Users\pushp\AppData\Local\Temp\tmpuojhhrs9'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 12), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 7), dtype=tf.float32, name=None)
Captures:
  2723370177872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370181136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370178640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370179600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370180368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370183824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370184400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370184976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370183440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370183248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2723370186128:



In [77]:
with open("forest_cover_model_quantized.tflite", "wb") as f:
    f.write(tflite_model)


In [78]:
print("\nTesting quantized TFLite model accuracy...")
interpreter = tf.lite.Interpreter(model_path="forest_cover_model_quantized.tflite")
interpreter.allocate_tensors()


Testing quantized TFLite model accuracy...


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [79]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [80]:
tflite_predictions = []
for sample in X_test_scaled:
    interpreter.set_tensor(input_details[0]['index'], sample.reshape(1, -1).astype(np.float32))
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output_data) + 1  # Convert back to 1-7
    tflite_predictions.append(pred_class)

In [81]:
tflite_accuracy = accuracy_score(y_test_original, tflite_predictions)
print(f"Original TensorFlow model accuracy: {test_accuracy:.4f}")
print(f"Quantized TFLite model accuracy: {tflite_accuracy:.4f}")
print(f"Accuracy difference: {(test_accuracy - tflite_accuracy):.4f}")

Original TensorFlow model accuracy: 0.8717
Quantized TFLite model accuracy: 0.8433
Accuracy difference: 0.0284


In [82]:
print(f"\nFirst 10 predictions comparison:")
print(f"Original model: {y_pred_original[:10].tolist()}")
print(f"Quantized model: {tflite_predictions[:10]}")
print(f"Actual values: {y_test_original.iloc[:10].tolist()}")


First 10 predictions comparison:
Original model: [4, 6, 4, 5, 4, 7, 7, 4, 7, 7]
Quantized model: [np.int64(4), np.int64(6), np.int64(4), np.int64(5), np.int64(3), np.int64(7), np.int64(7), np.int64(4), np.int64(7), np.int64(7)]
Actual values: [4, 6, 4, 5, 4, 7, 7, 4, 7, 2]


In [83]:
import os
keras_size = os.path.getsize("forest_cover_model.keras") / 1024  # KB
tflite_size = os.path.getsize("forest_cover_model_quantized.tflite") / 1024  # KB

In [84]:
print(f"\nModel Size Comparison:")
print(f"TensorFlow model: {keras_size:.2f} KB")
print(f"Quantized TFLite model: {tflite_size:.2f} KB")
print(f"Size reduction: {((keras_size - tflite_size) / keras_size * 100):.1f}%")


Model Size Comparison:
TensorFlow model: 8412.19 KB
Quantized TFLite model: 754.05 KB
Size reduction: 91.0%
