In [6]:
import joblib
import numpy as np
import pandas as pd

# Load the trained models
rf_model = joblib.load('random_forest_model.joblib')
svm_model = joblib.load('svm_model.joblib')
log_reg_model = joblib.load('logistic_regression_model.joblib')

# Function to calculate DRASTIC index with the new model (includes Land_Use)
def calculate_drastric_index(depth_to_water, net_recharge, aquifer_media, soil_media, topography, vadose_zone, hydraulic_conductivity, land_use):
    drastic_index = (depth_to_water * 5 +
                     net_recharge * 4 +
                     aquifer_media * 3 +
                     soil_media * 2 +
                     topography * 1 +
                     vadose_zone * 5 +
                     hydraulic_conductivity * 3 +
                     land_use * 4)  # Weight for Land_Use
    return drastic_index

# Function to suggest actions based on contamination category
def suggest_actions(contamination_category):
    suggestions = {
        'low': "Contamination levels are low. Regular monitoring is recommended.",
        'medium': "Contamination levels are medium. Consider implementing management practices to reduce risks.",
        'high': "Contamination levels are high. Immediate action is required to mitigate contamination."
    }
    return suggestions.get(contamination_category, "No suggestions available.")

# Encoding maps for Vadose Zone, Aquifer Media, and Land Use
vadose_zone_encoding = {'Sandy': 1, 'Clay': 2, 'Silty': 3, 'Fractured':4, 'Gravel':5}
aquifer_media_encoding = {'Gravel': 1, 'Sandstone': 2, 'Limestone': 3, 'Shale': 4, 'Basalt': 5}
land_use_encoding = {'Urban': 1, 'Agricultural': 2, 'Forested': 3}

def process_csv(input_file, output_file):
    # Read the input CSV file
    data = pd.read_csv(input_file)

    # Ensure all required columns are present
    required_columns = [
        'Depth_to_Water', 'Net_Recharge', 'Aquifer_Media', 'Soil_Media',
        'Topography', 'Vadose_Zone', 'Hydraulic_Conductivity', 'Land_Use'
    ]
    if not all(column in data.columns for column in required_columns):
        raise ValueError(f"Input file must contain the following columns: {required_columns}")

    # Strip leading and trailing spaces from string columns
    for column in ['Aquifer_Media', 'Vadose_Zone', 'Land_Use']:
        data[column] = data[column].str.strip()

    # One-hot encode categorical columns to match the training data
    encoded_data = pd.get_dummies(data, columns=['Aquifer_Media', 'Vadose_Zone', 'Land_Use'], drop_first=True)

    # Ensure the input data has the same columns as the training data
    # Add missing columns with 0 values to align with the model
    model_features = rf_model.feature_names_in_  # Features used during training
    for column in model_features:
        if column not in encoded_data.columns:
            encoded_data[column] = 0

    # Reorder columns to match the training data
    encoded_data = encoded_data[model_features]

    # Apply the evaluation function row by row
    results = encoded_data.apply(evaluate_row, axis=1)

    # Convert results into a DataFrame
    results_df = pd.DataFrame(results.tolist(), columns=[
        'RF_Prediction', 'RF_Suggestion',
        'SVM_Prediction', 'SVM_Suggestion',
        'Log_Reg_Prediction', 'Log_Reg_Suggestion'
    ])

    # Combine the original data with the results
    output_data = pd.concat([data, results_df], axis=1)

    # Save the results to the output CSV file
    output_data.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

def evaluate_row(row):
    # Use the preprocessed input row directly for prediction
    input_data = np.array(row).reshape(1, -1)

    # Make predictions
    rf_prediction = rf_model.predict(input_data)[0]
    svm_prediction = svm_model.predict(input_data)[0]
    log_reg_prediction = log_reg_model.predict(input_data)[0]

    # Get suggestions based on predictions
    rf_suggestion = suggest_actions(rf_prediction)
    svm_suggestion = suggest_actions(svm_prediction)
    log_reg_suggestion = suggest_actions(log_reg_prediction)

    return rf_prediction, rf_suggestion, svm_prediction, svm_suggestion, log_reg_prediction, log_reg_suggestion

# Main function to process input
def main():
    print("Batch Groundwater Vulnerability Prediction with Updated DRASTIC Model")
    input_file = input("Enter the input CSV file path (e.g., input.csv): ")
    output_file = input("Enter the output CSV file path (e.g., output.csv): ")

    # Process the CSV and generate results
    process_csv(input_file, output_file)

# Run the main function
if __name__ == "__main__":
    main()

Batch Groundwater Vulnerability Prediction with Updated DRASTIC Model


Enter the input CSV file path (e.g., input.csv):  newtable.csv
Enter the output CSV file path (e.g., output.csv):  output2.csv


AttributeError: 'RandomForestClassifier' object has no attribute 'feature_names_in_'