In [21]:
# Import required Python libraries
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import roc_auc_score, confusion_matrix
from lightgbm import LGBMClassifier
import joblib

# Specify the type of features to use: choose either all available metrics or derived indicators only
DEMorAll = "AllMetrics"  # Options: "DemDerivedIndicators" OR "AllMetrics"

# Load the dataset containing feature and target data
data01 = pd.read_csv('1844Points.csv', encoding='gbk')  # Load data with GBK encoding

# Rename columns for better clarity and understanding
data01.columns = ['FID', 'X', 'Y', 'Aspect', 'Elevation', 'Distance to lineament', 'Lineament density', 
                  'NDVI', 'Plan curvature', 'Profile curvature', 'Slope', 'Slope length', 'STI',
                  'SPI', 'TPI', 'TWI', 'VRM', 'LULC', 'Habitat', 'GDP',
                  'Distance from River', 'Distance from Road', 'target']

# Drop unnecessary columns based on the feature selection type
columns_to_drop = ['FID', 'X', 'Y'] if DEMorAll == 'AllMetrics' else ['FID', 'X', 'Y', 
                                                                     'LULC', 'Habitat', 'GDP', 'NDVI', 'Distance from Road']
data01.drop(columns=columns_to_drop, inplace=True)  # Remove the specified columns from the dataset

# Define the path to save or load the LightGBM model
model_path = 'Output\light_{}.model'.format(DEMorAll)

# Set a random seed for reproducibility of results
seed = 4

# Specify filenames for importing and exporting additional datasets
filename_import = 'TheLocationOfTheStudyArea.csv'  # Input CSV file containing study area data
filename_export = 'Output\TheLocation_light_{}.csv'.format(DEMorAll)  # Output file for processed data

# Load the dataset for the study area's locations and features
data_all = pd.read_csv(filename_import)

# Rename the columns of the loaded dataset for consistency
data_all.columns = ['FID', 'X', 'Y', 'Aspect', 'Elevation', 'Distance to lineament', 'Lineament density', 
                    'NDVI', 'Plan curvature', 'Profile curvature', 'Slope', 'Slope length', 'STI',
                    'SPI', 'TPI', 'TWI', 'VRM', 'LULC', 'Habitat', 'GDP',
                    'Distance from River', 'Distance from Road']


In [22]:
# Prepare the dataset for predictions
# Drop specified columns from the dataset to prepare the feature matrix
X_all = data_all.drop(columns=columns_to_drop)
seed = 4  # Set random seed for reproducibility

# Save the column names for later use
header = data01.columns.tolist()

# Split the dataset into two dataframes based on the target value
data_0 = data01.loc[data01['target'] == 0]  # Data where target = 0
data_1 = data01.loc[data01['target'] == 1]  # Data where target = 1

# Split data where target = 0 into 80% training and 20% validation sets
data_0_X = data_0.drop(columns=["target"], axis=1)  # Features
data_0_Y = data_0.target  # Target variable
train_0_X, valid_0_X, train_0_y, valid_0_y = train_test_split(data_0_X, data_0_Y, test_size=0.2, random_state=seed)
# Combine features and target back into DataFrames for saving
save_TrainDate_0 = pd.DataFrame(np.column_stack([train_0_X, train_0_y]), columns=header)
save_ValidDate_0 = pd.DataFrame(np.column_stack([valid_0_X, valid_0_y]), columns=header)

# Split data where target = 1 into 80% training and 20% validation sets
data_1_X = data_1.drop(columns=["target"], axis=1)  # Features
data_1_Y = data_1.target  # Target variable
train_1_X, valid_1_X, train_1_y, valid_1_y = train_test_split(data_1_X, data_1_Y, test_size=0.2, random_state=seed)
# Combine features and target back into DataFrames for saving
save_TrainDate_1 = pd.DataFrame(np.column_stack([train_1_X, train_1_y]), columns=header)
save_ValidDate_1 = pd.DataFrame(np.column_stack([valid_1_X, valid_1_y]), columns=header)

# Combine the training datasets and shuffle the data to avoid any ordering bias
train_date = pd.concat([save_TrainDate_0, save_TrainDate_1])
train_date = train_date.sample(frac=1, random_state=42)

# Combine the validation datasets and shuffle the data to avoid any ordering bias
valid_date = pd.concat([save_ValidDate_0, save_ValidDate_1])
valid_date = valid_date.sample(frac=1, random_state=42)

# Separate features (X) and target (y) from the training dataset
train_y = train_date.target  # Training target
train_X = train_date.drop(columns=["target"], axis=1)  # Training features

# Separate features (X) and target (y) from the validation dataset
valid_y = valid_date.target  # Validation target
valid_X = valid_date.drop(columns=["target"], axis=1)  # Validation features



In [23]:

# Load the pre-trained LightGBM model
light_model = joblib.load(model_path)

# Retrieve and print the best AUC score from the model
best_score = light_model.best_score_['valid_1']['auc']
print("******* Best Score *******")
print(best_score)

# Process raster-to-point data, preparing for prediction
# Save column names from the feature matrix
header = X_all.columns.tolist()

# Extract latitude and longitude from the dataset by dropping feature columns
latitude_and_longitude = data_all.drop(columns=header, axis=1)

# Align columns in X_all to match the order of train_X for consistency in predictions
X_all = X_all.reindex(columns=train_X.columns)

# Perform predictions using the pre-trained model
Y_all = light_model.predict_proba(X_all)[:, 1]  # Predict probabilities for the positive class
# Uncomment below if you want binary predictions instead of probabilities
# Y_all = (Y_all >= 0.5) * 1

# Convert predictions to a DataFrame for easy export
Y_all = pd.DataFrame(Y_all, columns=['target'])

# Merge the predictions with the latitude and longitude
merge_XY = pd.concat([latitude_and_longitude, X_all, Y_all], axis=1)

# Extract only latitude, longitude, and target columns for final output
merge_XY = pd.concat([merge_XY.X, merge_XY.Y, merge_XY.target], axis=1)

# Save the final output to a CSV file
merge_XY.to_csv(filename_export, index=False)

print('DOWN')  # Indicate completion of the process

******* Best Score *******
0.850781592403214
DOWN
