# Model building

### 1. Import libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn.model_selection import GridSearchCV

### 2. Data preprocessing

In [None]:
#Read data
df = pd.read_csv("tomato_dataset.csv")

In [None]:
df

In [None]:
# Retrieve and prepare the necessary data by dropping irrelevant columns
cols_to_drop = ["id_soil", "device_identifier_soil", "line", "id_env", "device_identifier_env"]
df = df.drop(cols_to_drop, axis = 1)

In [None]:
df

In [None]:
#Rename columns
df.rename(columns={
    'humidity_soil': 'soil_moisture',
    'temperature_soil': 'soil_temperature',
    'humidity_env': 'env_humidity',
    'temperature_env': 'env_temperature',
    'time': 'hourly_time',
    'sum_rain': 'precipitations_mm',
    'mean_humidity': 'humidity',
    'mean_et0_fao': 'et0_fao',
    'irrigation_label': 'irrigation_status'
}, inplace=True)

In [None]:
df

In [None]:
#Data without time labels
data = df.drop(['ts_generation', 'hourly_time'], axis=1)

In [None]:
data

In [None]:
# Encode the target column (irrigation_status)
label_encoder = LabelEncoder()
data['irrigation_status'] = label_encoder.fit_transform(data['irrigation_status'])


In [None]:
data

In [None]:
# Separate features and target
X = data.drop('irrigation_status', axis=1).values
y = data['irrigation_status'].values

In [None]:
# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 3. Training

In [None]:
# Define the model
model = MLPClassifier(random_state=42, max_iter=500)

# Define the parameter grid for Grid Search
param_grid = {
    'hidden_layer_sizes': [(64, 32), (128, 64), (64,), (128,)],  # Different layer configurations
    'activation': ['relu', 'tanh'],  # Activation functions to test
    'solver': ['adam', 'sgd'],       # Solvers to test
    'alpha': [0.0001, 0.001, 0.01], # Regularization parameter
    'learning_rate': ['constant', 'adaptive']  # Learning rate strategies
}

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',  # Use appropriate scoring metric for your task
    cv=3,                # 3-fold cross-validation
    verbose=3            # Verbosity for progress updates
)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Print the best parameters and score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

# Evaluate on test set (optional)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

### 4. Testing

In [None]:
# Evaluate the model
y_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.2f}")

In [None]:
# Save the scaler, model, and label encoder
joblib.dump(scaler, 'StandardScaler_2.joblib')
joblib.dump(model, 'mlp_model.joblib')
joblib.dump(label_encoder, 'label_encoder_2.joblib')


In [None]:
# Function to predict a single row
def prepare_single_row_for_prediction(features_array, model, scaler, label_encoder):
    # Scale the features
    sample_row_scaled = scaler.transform([features_array])

    # Predict using the neural network model
    prediction = model.predict(sample_row_scaled)

    # Decode the predicted class using the label encoder
    predicted_class = label_encoder.inverse_transform(prediction)
    print(predicted_class)
    
    return predicted_class[0]

# Example usage
sample_row = np.array([723.0, 31.90, 29.0, 31.5, 35.9, 23.1, 85.222222, 0.144306])  # Replace with actual features
predicted_class = prepare_single_row_for_prediction(sample_row, model, scaler, label_encoder)
print(f"Predicted class: {predicted_class}")

In [None]:
label_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}
print("Label Mapping (Encoded Value -> Class Name):")
for encoded_value, class_name in label_mapping.items():
    print(f"{encoded_value}: {class_name}")