<a href="https://colab.research.google.com/github/LSShrivathsan/soil-moisture-analysis/blob/main/randomregressormodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

# Set a random seed for reproducibility
np.random.seed(0)

# Define the number of data points
num_samples = 100

# Generate synthetic data for three different scenarios
# 1. Irrigation Dataset
temperature_irrigation = np.random.uniform(15, 25, num_samples)  # Lower temperature
humidity_irrigation = np.random.uniform(40, 60, num_samples)       # Lower humidity
soil_moisture_irrigation = 30 - 0.1 * temperature_irrigation + 0.2 * humidity_irrigation + np.random.normal(0, 2, num_samples)
irrigation_data = pd.DataFrame({'Temperature (째C)': temperature_irrigation,
                                'Humidity (%)': humidity_irrigation,
                                'Soil Moisture (%)': soil_moisture_irrigation})

# 2. Drainage Dataset
temperature_drainage = np.random.uniform(20, 30, num_samples)  # Higher temperature
humidity_drainage = np.random.uniform(70, 80, num_samples)     # Higher humidity
soil_moisture_drainage = 30 - 0.1 * temperature_drainage + 0.2 * humidity_drainage + np.random.normal(0, 2, num_samples)
drainage_data = pd.DataFrame({'Temperature (째C)': temperature_drainage,
                              'Humidity (%)': humidity_drainage,
                              'Soil Moisture (%)': soil_moisture_drainage})

# 3. Optimal Soil Moisture Dataset
temperature_optimal = np.random.uniform(20, 25, num_samples)  # Moderate temperature
humidity_optimal = np.random.uniform(60, 70, num_samples)     # Moderate humidity
soil_moisture_optimal = 30 - 0.1 * temperature_optimal + 0.2 * humidity_optimal + np.random.normal(0, 2, num_samples)
optimal_data = pd.DataFrame({'Temperature (째C)': temperature_optimal,
                             'Humidity (%)': humidity_optimal,
                             'Soil Moisture (%)': soil_moisture_optimal})

# Save the datasets to CSV files
irrigation_data.to_csv('irrigation_dataset.csv', index=False)
drainage_data.to_csv('drainage_dataset.csv', index=False)
optimal_data.to_csv('optimal_dataset.csv', index=False)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Loading the dataset
data = pd.read_csv('irrigation_dataset.csv')

# Extracting features & target -soil moisture
X = data[['Temperature (째C)', 'Humidity (%)']]
y = data['Soil Moisture (%)']

# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Random Forest Regressor model
rf_model = RandomForestRegressor(random_state=42)

# Performing grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Getting the best hyperparameters from the grid search
best_params = grid_search.best_params_
print(f'Best Hyperparameters: {best_params}')

# Training the model with the best hyperparameters
best_rf_model = RandomForestRegressor(random_state=42, **best_params)
best_rf_model.fit(X_train, y_train)

# predictions on the test data using the best model
y_rf_pred = best_rf_model.predict(X_test)

# Evaluate the best model's performance using metrics
mse_rf = mean_squared_error(y_test, y_rf_pred)
r2_rf = r2_score(y_test, y_rf_pred)
print(f'Random Forest Mean Squared Error: {mse_rf}')
print(f'Random Forest R-squared: {r2_rf}')

irrigationThreshold = 40  # Soil moisture decision thresholds
drainageThreshold = 60

# Iterating through each prediction and make a decision
for i in range(len(y_rf_pred)):
    if y_rf_pred[i] < irrigationThreshold:
        print(f"Data Point {i+1}: Decision - Irrigate the soil")
    elif y_rf_pred[i] > drainageThreshold:
        print(f"Data Point {i+1}: Decision - Drain the soil")
    else:
        print(f"Data Point {i+1}: Decision - Soil moisture is optimal")

Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 50}
Random Forest Mean Squared Error: 4.422253526439638
Random Forest R-squared: -0.4349272488397533
Data Point 1: Decision - Irrigate the soil
Data Point 2: Decision - Irrigate the soil
Data Point 3: Decision - Irrigate the soil
Data Point 4: Decision - Soil moisture is optimal
Data Point 5: Decision - Soil moisture is optimal
Data Point 6: Decision - Soil moisture is optimal
Data Point 7: Decision - Irrigate the soil
Data Point 8: Decision - Irrigate the soil
Data Point 9: Decision - Irrigate the soil
Data Point 10: Decision - Irrigate the soil
Data Point 11: Decision - Irrigate the soil
Data Point 12: Decision - Irrigate the soil
Data Point 13: Decision - Irrigate the soil
Data Point 14: Decision - Irrigate the soil
Data Point 15: Decision - Irrigate the soil
Data Point 16: Decision - Irrigate the soil
Data Point 17: Decision - Irrigate the soil
Data Point 18: Decision - Soil mois