In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import cv2
import os
from tqdm import tqdm
import joblib

In [None]:
def create_soil_image(fertility_level):
    # Create a base image
    img = np.zeros((224, 224, 3), dtype=np.uint8)

    # Define color ranges based on fertility
    if fertility_level < 3:
        color_range = ((150, 180), (150, 180), (150, 180))  # Light colors for low fertility
    elif fertility_level < 7:
        color_range = ((100, 150), (80, 130), (60, 110))    # Medium colors for moderate fertility
    else:
        color_range = ((60, 100), (40, 80), (20, 60))       # Dark colors for high fertility

    # Fill the image with a base color
    base_color = [np.random.randint(low, high) for low, high in color_range]
    img[:] = base_color

    # Add some texture
    for _ in range(10000):
        x = np.random.randint(0, 224)
        y = np.random.randint(0, 224)
        color = [np.random.randint(max(0, c-20), min(255, c+20)) for c in base_color]
        cv2.circle(img, (x, y), np.random.randint(1, 5), color, -1)

    # Add some "organic matter" specks for higher fertility soils
    if fertility_level > 5:
        for _ in range(50):
            x = np.random.randint(0, 224)
            y = np.random.randint(0, 224)
            cv2.circle(img, (x, y), np.random.randint(1, 3), (20, 20, 20), -1)

    return img

In [None]:
def generate_dataset(output_dir, num_images=1000, fertility_levels=10):
    os.makedirs(output_dir, exist_ok=True)

    # Create directories for each fertility level
    for level in range(fertility_levels):
        os.makedirs(os.path.join(output_dir, str(level)), exist_ok=True)

    images_per_level = num_images // fertility_levels

    for level in range(fertility_levels):
        for i in tqdm(range(images_per_level), desc=f"Generating images for fertility level {level}"):
            # Add some noise to the fertility level
            fertility_level = level + np.random.uniform(-0.5, 0.5)
            fertility_level = max(0, min(9.99, fertility_level))  # Ensure it's between 0 and 9.99

            img = create_soil_image(fertility_level)

            # Save the image
            img_path = os.path.join(output_dir, str(level), f"soil_{level}_{i:04d}.jpg")
            cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

In [None]:
if __name__ == "__main__":
    output_dir = "synthetic_soil_datasetv1"
    generate_dataset(output_dir)
    print('\n')
    print(f"Dataset generated in {output_dir}")

Generating images for fertility level 0: 100%|██████████| 100/100 [00:29<00:00,  3.39it/s]
Generating images for fertility level 1: 100%|██████████| 100/100 [00:28<00:00,  3.48it/s]
Generating images for fertility level 2: 100%|██████████| 100/100 [00:29<00:00,  3.40it/s]
Generating images for fertility level 3: 100%|██████████| 100/100 [00:28<00:00,  3.57it/s]
Generating images for fertility level 4: 100%|██████████| 100/100 [00:28<00:00,  3.48it/s]
Generating images for fertility level 5: 100%|██████████| 100/100 [00:27<00:00,  3.62it/s]
Generating images for fertility level 6: 100%|██████████| 100/100 [00:28<00:00,  3.53it/s]
Generating images for fertility level 7: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]
Generating images for fertility level 8: 100%|██████████| 100/100 [00:28<00:00,  3.49it/s]
Generating images for fertility level 9: 100%|██████████| 100/100 [00:29<00:00,  3.43it/s]



Dataset generated in synthetic_soil_datasetv1





In [None]:
def extract_features(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resize image to ensure consistent size
    img = cv2.resize(img, (224, 224))

    # Extract color features
    average_color = np.mean(img, axis=(0, 1))

    # Extract texture features using Gray-Level Co-Occurrence Matrix (GLCM)
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    glcm = cv2.getGaborKernel((21, 21), 8.0, np.pi/4, 10.0, 0.5, 0, ktype=cv2.CV_32F)
    filtered = cv2.filter2D(gray, cv2.CV_8UC3, glcm)
    texture_features = np.mean(filtered), np.std(filtered)

    # Combine features
    features = np.concatenate([average_color, texture_features])
    return features

In [None]:
def load_dataset(data_path):
    data = []
    labels = []

    for fertility_level in os.listdir(data_path):
        level_path = os.path.join(data_path, fertility_level)
        if os.path.isdir(level_path):
            for image_file in tqdm(os.listdir(level_path), desc=f"Processing {fertility_level}"):
                image_path = os.path.join(level_path, image_file)
                features = extract_features(image_path)
                data.append(features)
                labels.append(float(fertility_level))
    return np.array(data), np.array(labels)

In [None]:
def train_model(X, y):
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scaling the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Define the model
    model = GradientBoostingRegressor(random_state=42)

    # Define hyperparameter grid for tuning
    param_grid = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # Initialize GridSearchCV or RandomizedSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid,
                               cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=1)

    # Train the model with grid search
    grid_search.fit(X_train_scaled, y_train)

    # Best model
    best_model = grid_search.best_estimator_

    # Predictions
    y_pred = best_model.predict(X_test_scaled)

    # Performance metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Best Hyperparameters: {grid_search.best_params_}")
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared Score: {r2}")

    return best_model, scaler

In [None]:
def categorize_fertility(fertility_level):
    if fertility_level < 4.88:
        return 'Low fertility'
    elif fertility_level < 6.88:
        return 'Medium fertility'
    else:
        return 'High fertility'

In [None]:
def predict_fertility(model, scaler, image_path):
    features = extract_features(image_path)
    features_scaled = scaler.transform(features.reshape(1, -1))
    prediction = model.predict(features_scaled)
    return prediction[0]

In [None]:
if __name__ == "__main__":
    data_path = "/content/synthetic_soil_datasetv1"
    X, y = load_dataset(data_path)
    model, scaler = train_model(X, y)

    # Save the trained model and scaler
    joblib.dump(model, 'soil_fertility_model.pkl')
    joblib.dump(scaler, 'scaler.pkl')

Processing 8: 100%|██████████| 100/100 [00:00<00:00, 263.02it/s]
Processing 0: 100%|██████████| 100/100 [00:00<00:00, 266.38it/s]
Processing 6: 100%|██████████| 100/100 [00:00<00:00, 129.80it/s]
Processing 9: 100%|██████████| 100/100 [00:00<00:00, 149.69it/s]
Processing 7: 100%|██████████| 100/100 [00:00<00:00, 212.84it/s]
Processing 1: 100%|██████████| 100/100 [00:00<00:00, 133.44it/s]
Processing 5: 100%|██████████| 100/100 [00:00<00:00, 264.58it/s]
Processing 2: 100%|██████████| 100/100 [00:00<00:00, 271.34it/s]
Processing 3: 100%|██████████| 100/100 [00:00<00:00, 256.04it/s]
Processing 4: 100%|██████████| 100/100 [00:00<00:00, 258.91it/s]

Fitting 5 folds for each of 324 candidates, totalling 1620 fits





Best Hyperparameters: {'learning_rate': 0.05, 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Mean Squared Error: 0.6973635462435294
R-squared Score: 0.9065935056398385


In [None]:
if __name__ == "__main__":
  model = joblib.load('soil_fertility_model.pkl')
  scaler = joblib.load('scaler.pkl')
  image_path = "/content/test_002.jpg"
  fertility_prediction = predict_fertility(model, scaler, image_path)
  print(f"Predicted Soil Fertility: {fertility_prediction}")
  print(f"Soil Category: {categorize_fertility(fertility_prediction)}")

Predicted Soil Fertility: 7.823010179407664
Soil Category: High fertility
