In [15]:
import joblib
import numpy as np
import pandas as pd

# Load the trained models
rf_model = joblib.load('random_forest_model.joblib')
svm_model = joblib.load('svm_model.joblib')
log_reg_model = joblib.load('logistic_regression_model.joblib')

# Function to calculate DRASTIC index
def calculate_drastric_index(depth_to_water, net_recharge, aquifer_media, soil_media, topography, vadose_zone, hydraulic_conductivity):
    drastic_index = (depth_to_water * 5 +
                     net_recharge * 4 +
                     aquifer_media * 3 +
                     soil_media * 2 +
                     topography * 1 +
                     vadose_zone * 5 +
                     hydraulic_conductivity * 3)
    return drastic_index

# Function to suggest actions based on contamination category
def suggest_actions(contamination_category):
    suggestions = {
        'low': "Contamination levels are low. Regular monitoring is recommended.",
        'medium': "Contamination levels are medium. Consider implementing management practices to reduce risks.",
        'high': "Contamination levels are high. Immediate action is required to mitigate contamination."
    }
    return suggestions.get(contamination_category, "No suggestions available.")

# Encoding maps for Vadose Zone and Aquifer Media
vadose_zone_encoding = {'soft': 1, 'medium': 2, 'hard': 3}
aquifer_media_encoding = {'Confined': 1, 'unconfined': 2}

def evaluate_row(row):
    # Strip leading and trailing spaces for categorical values
    vadose_zone = vadose_zone_encoding[row['Vadose_Zone'].strip()]
    aquifer_media = aquifer_media_encoding[row['Aquifer_Media'].strip()]
    
    # Extract other parameters
    depth_to_water = row['Depth_to_Water']
    net_recharge = row['Net_Recharge']
    soil_media = row['Soil_Media']  # Assuming Soil Media is already numerical
    topography = row['Topography']
    hydraulic_conductivity = row['Hydraulic_Conductivity']

    # Create input data array for prediction
    input_data = np.array([[depth_to_water, net_recharge, aquifer_media, soil_media, topography, vadose_zone, hydraulic_conductivity]])

    # Make predictions
    rf_prediction = rf_model.predict(input_data)[0]
    svm_prediction = svm_model.predict(input_data)[0]
    log_reg_prediction = log_reg_model.predict(input_data)[0]

    # Get suggestions based on predictions
    rf_suggestion = suggest_actions(rf_prediction)
    svm_suggestion = suggest_actions(svm_prediction)
    log_reg_suggestion = suggest_actions(log_reg_prediction)

    # Return predictions and suggestions
    return rf_prediction, rf_suggestion, svm_prediction, svm_suggestion, log_reg_prediction, log_reg_suggestion

def process_csv(input_file, output_file):
    # Read the input CSV file with a semicolon delimiter
    data = pd.read_csv(input_file)

    # Ensure all required columns are present
    required_columns = ['Depth_to_Water', 'Net_Recharge', 'Aquifer_Media', 'Soil_Media', 'Topography', 'Vadose_Zone', 'Hydraulic_Conductivity']
    if not all(column in data.columns for column in required_columns):
        raise ValueError(f"Input file must contain the following columns: {required_columns}")

    # Strip leading and trailing spaces from string columns
    for column in ['Aquifer_Media', 'Vadose_Zone']:
        data[column] = data[column].str.strip()

    print("Unique values in Aquifer_Media:", data['Aquifer_Media'].unique())
    print("Unique values in Vadose_Zone:", data['Vadose_Zone'].unique())

    # Apply the evaluation function to each row
    results = data.apply(evaluate_row, axis=1)

    # Convert results into a DataFrame
    results_df = pd.DataFrame(results.tolist(), columns=[
        'RF_Prediction', 'RF_Suggestion', 
        'SVM_Prediction', 'SVM_Suggestion', 
        'Log_Reg_Prediction', 'Log_Reg_Suggestion'
    ])

    # Combine the original data with the results
    output_data = pd.concat([data, results_df], axis=1)

    # Save the results to the output CSV file with a semicolon delimiter
    output_data.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Main function to process CSV input
def main():
    print("Batch Groundwater Vulnerability Prediction")
    input_file = input("Enter the input CSV file path (e.g., input.csv): ")
    output_file = input("Enter the output CSV file path (e.g., output.csv): ")

    # Process the CSV and generate results
    process_csv(input_file, output_file)

# Run the main function
if __name__ == "__main__":
    main()

Batch Groundwater Vulnerability Prediction


Enter the input CSV file path (e.g., input.csv):  ./table.csv
Enter the output CSV file path (e.g., output.csv):  output.csv


Unique values in Aquifer_Media: ['Confined' 'unconfined']
Unique values in Vadose_Zone: ['medium' 'soft' 'hard']
Results saved to output.csv


