In [12]:
import os
import numpy as np
import rasterio
from autogluon.tabular import TabularPredictor
import pandas as pd
from tqdm import tqdm

In [13]:

class TIFPredictor:
    def __init__(self, model_path, model_name, input_folder, chunk_size=(1000, 1000)):
        self.model = TabularPredictor.load(model_path)
        self.select_model = model_name
        self.input_folder = input_folder
        self.feature_columns = self.model.feature_metadata_in.get_features()
        self.chunk_size = chunk_size

    def read_tif_info(self, file_path):
        with rasterio.open(file_path) as src:
            return src.profile, src.shape

    def read_tif_chunk(self, file_path, row_start, row_end, col_start, col_end):
        with rasterio.open(file_path) as src:
            return src.read(1, window=((row_start, row_end), (col_start, col_end)))

    def generate_chunks(self, shape):
        rows, cols = shape
        for row in range(0, rows, self.chunk_size[0]):
            for col in range(0, cols, self.chunk_size[1]):
                yield (row, min(row + self.chunk_size[0], rows),
                       col, min(col + self.chunk_size[1], cols))

    def predict(self, output_path):
        # Get metadata from the first TIF file
        first_tif = os.path.join(self.input_folder, f"{self.feature_columns[0]}.tif")
        profile, shape = self.read_tif_info(first_tif)

        # Prepare the output file
        profile.update(dtype=rasterio.float32, count=1, compress='lzw')
        total_chunks = sum(1 for _ in self.generate_chunks(shape))
        with rasterio.open(output_path, 'w', **profile) as dst:
            with tqdm(total=total_chunks, desc="处理进度") as pbar:
                for row_start, row_end, col_start, col_end in self.generate_chunks(shape):
                    chunk_data = {}
                    for feature in self.feature_columns:
                        tif_path = os.path.join(self.input_folder, f"{feature}.tif")
                        chunk = self.read_tif_chunk(tif_path, row_start, row_end, col_start, col_end)
                        chunk_data[feature] = chunk.flatten()

                    # Prepare input data for the model
                    X = pd.DataFrame(chunk_data)

                    # Make predictions
                    predictions = self.model.predict(X, model=self.select_model)

                    # Reshape predictions to match the chunk size
                    predictions = predictions.values.reshape((row_end - row_start, col_end - col_start))

                    # Write the predictions to the output file
                    dst.write(predictions.astype(rasterio.float32), 1, window=((row_start, row_end), (col_start, col_end)))
                pbar.update(1)
        print(f"Predictions saved to {output_path}")



In [15]:
# 使用示例
model_path = r'F:\cache_data\model_path\sb\soil_type\autogluon\autogluon_20240902'
model_name = 'XGBoost'  # 例如 'WeightedEnsemble_L2'
input_folder = r'F:\tif_features\county_feature\sb'
output_path = r'C:\Users\Runker\Desktop\test\CSC\prediction.tif'

In [18]:
predictor = TIFPredictor(model_path, model_name, input_folder)
predictor.predict(output_path)