In [2]:
import pandas as pd
import geopandas as gpd
from scipy.stats import pearsonr

# Load the datasets
all_DEMs = gpd.read_file('Plotting/labels/all_DEM_labels.geojson')
all_OP = gpd.read_file('Plotting/labels/all_OP_labels.geojson')

# Add a 'dataset' identifier to each dataframe
all_DEMs['dataset'] = 'DEM'
all_OP['dataset'] = 'OP'

# Combine the datasets
combined_data = pd.concat([all_DEMs, all_OP])

# Reset the index to avoid issues with duplicate indices
combined_data.reset_index(drop=True, inplace=True)

# Convert mean_slope_angle, circularity, solidity to float and ensure no NaNs in these columns
combined_data['mean_slope_angle'] = combined_data['mean_slope_angle'].astype(float)
combined_data['circularity'] = combined_data['circularity'].astype(float)
combined_data['solidity'] = combined_data['solidity'].astype(float)
combined_data.dropna(subset=['mean_slope_angle', 'circularity', 'solidity'], inplace=True)

# Define the regions
regions = ['herschel', 'peel', 'gydan', 'kolguev']

# Initialize dictionaries to store results
mean_values = {}
correlation_results = {}

for region in regions:
    region_data = combined_data[combined_data['region'] == region]
    
    # Calculate means for each region and dataset
    mean_values[region] = region_data.groupby('dataset')[['mean_slope_angle', 'circularity', 'solidity']].mean()

    # Perform Pearson correlation tests to get correlation coefficients and p-values
    correlation_results[region] = {}
    for dataset in ['DEM', 'OP']:
        data = region_data[region_data['dataset'] == dataset]
        if not data.empty:
            corr_circ, p_value_circ = pearsonr(data['mean_slope_angle'], data['circularity'])
            corr_sol, p_value_sol = pearsonr(data['mean_slope_angle'], data['solidity'])
            correlation_results[region][f'{dataset}_circularity_corr'] = corr_circ
            correlation_results[region][f'{dataset}_circularity_pvalue'] = p_value_circ
            correlation_results[region][f'{dataset}_solidity_corr'] = corr_sol
            correlation_results[region][f'{dataset}_solidity_pvalue'] = p_value_sol

# Combine mean values and correlation results into DataFrames for display
mean_values_df = pd.concat(mean_values).reset_index().rename(columns={'level_0': 'Region', 'level_1': 'Dataset'})
correlation_results_df = pd.DataFrame(correlation_results).T.reset_index().rename(columns={'index': 'Region'})

# Merge the mean values and correlation results DataFrames
results_df = pd.merge(mean_values_df, correlation_results_df, on='Region')

# Print the results
print("\nMean Values and Correlation Results between Mean Slope Angle and Circularity/Solidity:")
print(results_df)

results_df



Mean Values and Correlation Results between Mean Slope Angle and Circularity/Solidity:
     Region dataset  mean_slope_angle  circularity  solidity  \
0  herschel     DEM         11.682046     0.499676  0.857798   
1  herschel      OP         10.148018     0.443140  0.849234   
2      peel     DEM         12.117320     0.503377  0.849064   
3      peel      OP         12.033060     0.415352  0.839350   
4     gydan     DEM          8.816322     0.321831  0.724858   
5     gydan      OP          8.445313     0.460616  0.855738   
6   kolguev     DEM         12.857045     0.403594  0.784630   
7   kolguev      OP         13.253531     0.420129  0.863345   

   DEM_circularity_corr  DEM_circularity_pvalue  DEM_solidity_corr  \
0              0.035436                0.759638           0.040587   
1              0.035436                0.759638           0.040587   
2             -0.077794                0.194328          -0.093837   
3             -0.077794                0.194328        

Unnamed: 0,Region,dataset,mean_slope_angle,circularity,solidity,DEM_circularity_corr,DEM_circularity_pvalue,DEM_solidity_corr,DEM_solidity_pvalue,OP_circularity_corr,OP_circularity_pvalue,OP_solidity_corr,OP_solidity_pvalue
0,herschel,DEM,11.682046,0.499676,0.857798,0.035436,0.759638,0.040587,0.725985,0.037884,0.5376626,0.093105,0.129139
1,herschel,OP,10.148018,0.44314,0.849234,0.035436,0.759638,0.040587,0.725985,0.037884,0.5376626,0.093105,0.129139
2,peel,DEM,12.11732,0.503377,0.849064,-0.077794,0.194328,-0.093837,0.117202,0.180414,2.233165e-07,0.145794,3e-05
3,peel,OP,12.03306,0.415352,0.83935,-0.077794,0.194328,-0.093837,0.117202,0.180414,2.233165e-07,0.145794,3e-05
4,gydan,DEM,8.816322,0.321831,0.724858,-0.158596,0.214428,-0.147174,0.249714,0.058546,0.3221282,0.083651,0.156799
5,gydan,OP,8.445313,0.460616,0.855738,-0.158596,0.214428,-0.147174,0.249714,0.058546,0.3221282,0.083651,0.156799
6,kolguev,DEM,12.857045,0.403594,0.78463,-0.163085,0.248012,-0.170934,0.225667,-0.106763,0.1860977,0.059832,0.459582
7,kolguev,OP,13.253531,0.420129,0.863345,-0.163085,0.248012,-0.170934,0.225667,-0.106763,0.1860977,0.059832,0.459582


In [1]:
import pandas as pd
import geopandas as gpd
from scipy.stats import pearsonr

# Load the datasets
common_DEMs = gpd.read_file('Plotting/labels/common_DEM_labels.geojson')
common_OT = gpd.read_file('Plotting/labels/all_OT_labels.geojson')

# Add a 'dataset' identifier to each dataframe
common_DEMs['dataset'] = 'DEM'
common_OT['dataset'] = 'OT'

# Combine the datasets
combined_data = pd.concat([common_DEMs, common_OT])

# Reset the index to avoid issues with duplicate indices
combined_data.reset_index(drop=True, inplace=True)

# Convert mean_slope_angle, circularity, solidity to float and ensure no NaNs in these columns
combined_data['mean_slope_angle'] = combined_data['mean_slope_angle'].astype(float)
combined_data['circularity'] = combined_data['circularity'].astype(float)
combined_data['solidity'] = combined_data['solidity'].astype(float)
combined_data.dropna(subset=['mean_slope_angle', 'circularity', 'solidity'], inplace=True)

# Define the regions
regions = ['herschel', 'peel', 'gydan', 'kolguev']

# Initialize dictionaries to store results
mean_values = {}
correlation_results = {}

for region in regions:
    region_data = combined_data[combined_data['region'].str.lower() == region.lower()]
    
    # Calculate means for each region and dataset
    mean_values[region] = region_data.groupby('dataset')[['mean_slope_angle', 'circularity', 'solidity']].mean()

    # Perform Pearson correlation tests to get correlation coefficients and p-values
    correlation_results[region] = {}
    for dataset in ['DEM', 'OT']:
        data = region_data[region_data['dataset'] == dataset]
        if not data.empty:
            corr_circ, p_value_circ = pearsonr(data['mean_slope_angle'], data['circularity'])
            corr_sol, p_value_sol = pearsonr(data['mean_slope_angle'], data['solidity'])
            correlation_results[region][f'{dataset}_circularity_corr'] = corr_circ
            correlation_results[region][f'{dataset}_circularity_pvalue'] = p_value_circ
            correlation_results[region][f'{dataset}_solidity_corr'] = corr_sol
            correlation_results[region][f'{dataset}_solidity_pvalue'] = p_value_sol

# Combine mean values and correlation results into DataFrames for display
mean_values_df = pd.concat(mean_values).reset_index().rename(columns={'level_0': 'Region', 'level_1': 'Dataset'})
correlation_results_df = pd.DataFrame(correlation_results).T.reset_index().rename(columns={'index': 'Region'})

# Merge the mean values and correlation results DataFrames
results_df = pd.merge(mean_values_df, correlation_results_df, on='Region')

# Print the results
print("\nMean Values and Correlation Results between Mean Slope Angle and Circularity/Solidity:")
print(results_df)

results_df



Mean Values and Correlation Results between Mean Slope Angle and Circularity/Solidity:
     Region dataset  mean_slope_angle  circularity  solidity  \
0  herschel     DEM         11.400240     0.503643  0.854977   
1  herschel      OT         10.214310     0.628711  0.883769   
2      peel     DEM         11.617462     0.519352  0.861321   
3      peel      OT         11.687178     0.577973  0.847892   
4     gydan     DEM          8.864484     0.334930  0.717482   
5     gydan      OT         10.513289     0.657805  0.902501   
6   kolguev     DEM         13.849295     0.389942  0.777356   
7   kolguev      OT         15.217998     0.635514  0.922633   

   DEM_circularity_corr  DEM_circularity_pvalue  DEM_solidity_corr  \
0              0.098562                0.509820           0.065803   
1              0.098562                0.509820           0.065803   
2             -0.094251                0.396696          -0.090424   
3             -0.094251                0.396696        

Unnamed: 0,Region,dataset,mean_slope_angle,circularity,solidity,DEM_circularity_corr,DEM_circularity_pvalue,DEM_solidity_corr,DEM_solidity_pvalue,OT_circularity_corr,OT_circularity_pvalue,OT_solidity_corr,OT_solidity_pvalue
0,herschel,DEM,11.40024,0.503643,0.854977,0.098562,0.50982,0.065803,0.660333,0.091552,0.498208,0.203431,0.129074
1,herschel,OT,10.21431,0.628711,0.883769,0.098562,0.50982,0.065803,0.660333,0.091552,0.498208,0.203431,0.129074
2,peel,DEM,11.617462,0.519352,0.861321,-0.094251,0.396696,-0.090424,0.41623,0.23966,0.0972,0.203144,0.16153
3,peel,OT,11.687178,0.577973,0.847892,-0.094251,0.396696,-0.090424,0.41623,0.23966,0.0972,0.203144,0.16153
4,gydan,DEM,8.864484,0.33493,0.717482,-0.253855,0.135182,-0.240409,0.157844,0.273339,0.243585,0.111426,0.640007
5,gydan,OT,10.513289,0.657805,0.902501,-0.253855,0.135182,-0.240409,0.157844,0.273339,0.243585,0.111426,0.640007
6,kolguev,DEM,13.849295,0.389942,0.777356,-0.342872,0.058989,-0.385148,0.032386,-0.274498,0.241512,-0.131141,0.58156
7,kolguev,OT,15.217998,0.635514,0.922633,-0.342872,0.058989,-0.385148,0.032386,-0.274498,0.241512,-0.131141,0.58156
