In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('dataset.csv')

# Display basic information about the dataset
print("Dataset Info:")
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst few rows:")
print(df.head())
print("\nDataset statistics:")
print(df.describe())


# Prepare the features and target
# Encode the Province column (categorical feature)
le = LabelEncoder()
df['Province_encoded'] = le.fit_transform(df['Province'])

# Define features (X) and target (y)
feature_columns = ['Province_encoded', 'Harvested Area', 'Rainfall', 'Humidity', 'Temperature']
X = df[feature_columns]
y = df['Production']

print(f"\nFeatures shape: {X.shape}")
print(f"Target shape: {y.shape}")

# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")

#scaling feature usign MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
feature_scaler = MinMaxScaler(feature_range=(0, 1))
target_scaler = MinMaxScaler(feature_range=(0, 1))

#scale features
X_train_scaled = feature_scaler.fit_transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=["Province","Harvested Area","Rainfall","Humidity","Temperature"], index=X_train.index)

# Scale target variable
# Reshape is needed because StandardScaler expects 2D array
y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()

# Step 6: Transform the test data using the same scalers
# CRUCIAL: Use transform (not fit_transform) on test data
X_test_scaled = feature_scaler.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=["Province","Harvested Area","Rainfall","Humidity","Temperature"], index=X_test.index)
y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1)).flatten()

# Build the Artificial Neural Network
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='linear')  # Linear activation for regression
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

# Display model architecture
print("\nModel Architecture:")
model.summary()

# Train the model
history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=1
)


from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred_scaled = model.predict(X_test_scaled)

# Regression metrics
mse = mean_squared_error(y_test_scaled, y_pred_scaled)
mae = mean_absolute_error(y_test_scaled, y_pred_scaled)
r2 = r2_score(y_test_scaled, y_pred_scaled)

print(f"Mean Squared Error: {mse:.6f}")
print(f"Mean Absolute Error: {mae:.6f}")
print(f"R² Score: {r2:.6f}")


Dataset Info:
Shape: (204, 7)
Columns: ['Province', 'Year', 'Harvested Area', 'Production', 'Rainfall', 'Humidity', 'Temperature']

First few rows:
  Province  Year  Harvested Area  Production  Rainfall  Humidity  Temperature
0     Aceh  2018          329516     1861567      2336        81           28
1     Aceh  2019          310012     1714438      1437        82           27
2     Aceh  2020          317869     1757313      1790        76           29
3     Aceh  2021          297058     1634640      2293        76           29
4     Aceh  2022          271750     1509456      1834        76           29

Dataset statistics:
              Year  Harvested Area    Production     Rainfall    Humidity  \
count   204.000000    2.040000e+02  2.040000e+02   204.000000  204.000000   
mean   2020.500000    3.126199e+05  1.623743e+06  2555.210784   80.230392   
std       1.712026    4.735449e+05  2.682466e+06   834.901557    3.966716   
min    2018.000000    1.390000e+02  4.230000e+02   490.

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.0922 - mae: 0.2071 - val_loss: 0.0754 - val_mae: 0.1470
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0459 - mae: 0.1381 - val_loss: 0.0727 - val_mae: 0.1765
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0465 - mae: 0.1492 - val_loss: 0.0700 - val_mae: 0.1703
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0401 - mae: 0.1352 - val_loss: 0.0680 - val_mae: 0.1496
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0388 - mae: 0.1253 - val_loss: 0.0651 - val_mae: 0.1364
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0483 - mae: 0.1311 - val_loss: 0.0595 - val_mae: 0.1282
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0334 -