In [1]:
from numpy import linalg as la
import numpy as np
import pandas as pa
import matplotlib.pyplot as plt
import gdal
import matplotlib as mat
import gnc 
from sklearn.decomposition import PCA

In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import gdal
from sklearn.decomposition import PCA

# Load PFT (Plant Functional Types) fractions data from two parts and concatenate them
data = np.load('F:/liyu/Landuse_map/Output_PFT/WGS/PFT_fraction_12band_part1.npy', allow_pickle=True)
data_1 = np.load('F:/liyu/Landuse_map/Output_PFT/WGS/PFT_fraction_12band_part2.npy', allow_pickle=True)
data = np.concatenate([data, data_1], axis=0)

# Load location index data
df = pd.read_csv('F:/liyu/Location_index_WGS1.csv')
df = df[['lat_index', 'lon_index']]  # Select only latitude and longitude indices

# Load and process elevation data
ele = gdal.Open('F:/liyu/elevation/altitude.tif')
ele = ele.ReadAsArray()
ele = np.ma.masked_less_equal(np.nan_to_num(ele), 0)  # Mask non-positive values

# Load and convert LST (Land Surface Temperature) data to Celsius
LST_array = gdal.Open('F:/liyu/surface_temperature/LST_WGS.tif')
LST_array = LST_array.ReadAsArray() * 0.02 - 273.15

# Reset index of the dataframe for clean iteration
df = df.reset_index()



# Iterate through specified ranges to process data in batches
for num in range(0, 41):
    dft = df[96058*(num-1):96058*num]  # Select a subset of data for processing
    dft = dft.reset_index()
    dft = dft[['lat_index', 'lon_index']]

    # Initialize columns for storing temperature differences
    dft['LST_Needle'] = 0
    dft['LST_Broad'] = 0
    dft['LST_Mixed'] = 0

    # Perform PCA to reduce dimensionality while retaining 99% of variance
    pca_analysis = PCA(n_components=0.99)

    # Iterate over each row in the dataframe to perform localized analysis
    for (a, i, j) in zip(dft.index, dft.lat_index, dft.lon_index):
        # Process elevation data to focus on a specific range around the current point
        ele_local = ele[i-6:i+5, j-6:j+5]
        ele_loc = np.ma.masked_outside(ele_local - ele_local[5, 5], -100, 100)
        ele_mask = np.tile(ele_loc, 12).reshape(12, 11, 11)
        local = np.ma.masked_array(data[:, i-6:i+5, j-6:j+5], mask=np.ma.array(ele_mask).mask)
        
        # Condition to ensure elevation data is within a reasonable standard deviation
        if np.std(ele_loc) < 100:
            # Prepare data for PCA and regression analysis
            Y_array = LST_array[i-6:i+5, j-6:j+5].reshape(121,)
            X_array = local.reshape(12, 121).transpose()
            
            # Check if the data matrix has full rank
            if np.linalg.matrix_rank(X_array) != 0:
                # Normalize and perform PCA
                N_array = X_array - X_array.mean(axis=0)
                pca = pca_analysis.fit(N_array)
                Z_array = pca.transform(N_array)
                
                # Regression to find coefficients
                try:
                    Coefficient = np.linalg.inv(np.transpose(Z_array).dot(Z_array)).dot(np.transpose(Z_array)).dot(Y_array)
                except np.linalg.LinAlgError:
                    continue
                else:
                    # Predict temperature differences for different vegetation types
                    # Needle vegetation prediction
                    P_array_10 = np.zeros((121, 12))
                    P_array_10[60, 5] = 1  # Specific index for needle vegetation
                    M_array_10 = P_array_10 - X_array.mean(axis=0)
                    Zp_array_10 = pca.transform(M_array_10)
                    Yp_array_10 = Zp_array_10.dot(Coefficient)
                    
                    # Broad vegetation prediction
                    P_array_20 = np.zeros((121, 12))
                    P_array_20[60, 1] = 1  # Specific index for broad vegetation
                    M_array_20 = P_array_20 - X_array.mean(axis=0)
                    Zp_array_20 = pca.transform(M_array_20)
                    Yp_array_20 = Zp_array
            

                    # Mixed vegetation prediction
                    P_array_30 = np.zeros((121, 12))
                    P_array_30[60, 4] = 1  # Specific index for mixed vegetation
                    M_array_30 = P_array_30 - X_array.mean(axis=0)
                    Zp_array_30 = pca.transform(M_array_30)
                    Yp_array_30 = Zp_array_30.dot(Coefficient)

                    # Open land prediction
                    P_array_40 = np.zeros((121, 12))
                    P_array_40[60, 11] = 1  # Specific index for open land
                    M_array_40 = P_array_40 - X_array.mean(axis=0)
                    Zp_array_40 = pca.transform(M_array_40)
                    Yp_array_40 = Zp_array_40.dot(Coefficient)

                    # Calculate differences in predicted temperatures
                    dif = (Yp_array_10 - Yp_array_40)[60]
                    dif_1 = (Yp_array_20 - Yp_array_40)[60]
                    dif_2 = (Yp_array_30 - Yp_array_40)[60]

                    # Store the temperature differences for each vegetation type in the dataframe
                    dft.iloc[a, 2] = dif
                    dft.iloc[a, 3] = dif_1
                    dft.iloc[a, 4] = dif_2

                else:
                    # If the regression or PCA fails, fill in zeros
                    dft.iloc[a, 2] = 0
                    dft.iloc[a, 3] = 0
                    dft.iloc[a, 4] = 0
            else:
                # If the data matrix is not full rank, indicating collinearity or insufficient data, fill in zeros
                dft.iloc[a, 2] = 0
                dft.iloc[a, 3] = 0
                dft.iloc[a, 4] = 0
        else:
            # If the elevation standard deviation condition is not met, indicating unsuitable terrain, fill in zeros
            dft.iloc[a, 2] = 0
            dft.iloc[a, 3] = 0
            dft.iloc[a, 4] = 0
            
    # Save the processed batch to a CSV file
    dft.to_csv('F:/liyu/Location_Day_LST_part{0}.csv'.format(num))



In [None]:
import numpy as np
import pandas as pd
import gdal

# Open the reference TIFF to get geospatial information
d_n = gdal.Open('F:/liyu/Landuse_map/First_map/luc_needle.tif')

# Initialize a numpy array to store LST differences for three types of vegetation
LST = np.zeros((3, 4478, 7390))

# Loop through the specified range of CSV files containing LST differences
for p in range(39, 41):
    # Load CSV file for the current part
    d = pd.read_csv('F:/liyu/Location_Day_LST_part{0}.csv'.format(p))
    
    # Iterate over the dataframe to populate the LST numpy array
    for (num, i, j) in zip(range(len(d)), d.lat_index.values, d.lon_index.values):
        LST[0, int(i), int(j)] = d['LST_Needle'][num]
        LST[1, int(i), int(j)] = d['LST_Broad'][num]
        LST[2, int(i), int(j)] = d['LST_Mixed'][num]

# Use the geotransform and projection from the opened TIFF file
im_geotrans = d_n.GetGeoTransform()
im_proj = d_n.GetProjection()  # Use GetProjection instead of GetGCPProjection if the latter is not available

# Define the path for the output GeoTIFF file
path = 'F:/liyu/Results_TIF/Three_forest_delta_LST_WGS.tif'

# Extract shape information from the numpy array
im_bands, im_height, im_width = LST.shape

# Set data type for the GeoTIFF
datatype = gdal.GDT_Float32

# Get the driver for GeoTIFF format
driver = gdal.GetDriverByName("GTiff")

# Create a new GeoTIFF file with the specified dimensions and data type
dataset = driver.Create(path, im_width, im_height, im_bands, datatype)

# Set geo-transform and projection from the original data
dataset.SetGeoTransform(im_geotrans)  # Write affine transformation parameters
dataset.SetProjection(im_proj)  # Write projection

# Write the LST difference data for each band to the GeoTIFF
for i in range(im_bands):
    dataset.GetRasterBand(i + 1).WriteArray(LST[i])

# Close the dataset to ensure data is written properly
dataset = None
