In [4]:
import pandas as pd

# Load the data
df_final = pd.read_csv("weighted_moho.csv")

# Find duplicated (latitude, longitude) pairs
duplicates = df_final.duplicated(subset=["latitude", "longitude"], keep=False)

# Extract and print the full duplicated rows
duplicated_rows = df_final[duplicates]

print(f"Total number of duplicated rows: {len(duplicated_rows)}")
print("\nSample duplicated rows (showing first 10):\n")
duplicated_rows.head(10)


Total number of duplicated rows: 5823

Sample duplicated rows (showing first 10):



Unnamed: 0,latitude,longitude,rf_moho,spline_moho,err_moho,weighted_moho,lat_bin,lon_bin,Moho,nearest_rf_spline_moho
0,36.43,94.87,49.5,62.995836,13.495836,49.5,36.0,94.0,,
1,36.43,94.87,48.0,62.995836,14.995836,48.0,36.0,94.0,,
6,40.5,95.8,48.8,51.333665,2.533665,48.8,40.0,95.0,,
7,40.5,95.8,49.5,51.333665,1.833665,49.5,40.0,95.0,,
8,32.97,94.14,75.0,70.150259,4.849741,75.0,32.0,94.0,,
9,32.97,94.14,74.3,70.150259,4.149741,74.3,32.0,94.0,,
11,29.37,90.18,80.0,62.73252,17.26748,80.0,29.0,90.0,,
12,28.93,89.74,76.0,60.955688,15.044312,76.0,28.0,89.0,,
13,28.73,89.66,76.0,60.11968,15.88032,76.0,28.0,89.0,,
14,28.49,89.66,62.0,59.08321,2.91679,62.0,28.0,89.0,,


In [14]:
import pandas as pd
from itertools import combinations

# Load the data
path_to_data_file = r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\Global_crust.csv"
data_eq_raw = pd.read_csv(path_to_data_file, comment='#', usecols=["Lat", "Long", "Moho_km"])

# Clean and filter the data
data_eq_raw["Moho_km"] = pd.to_numeric(data_eq_raw["Moho_km"], errors="coerce")
data_eq_all = data_eq_raw.dropna(subset=["Moho_km"]).reset_index(drop=True)

# Get duplicate (Lat, Long) entries
dupes = data_eq_all[data_eq_all.duplicated(subset=["Lat", "Long"], keep=False)]

# Store results
pairwise_results = []

# Group and compare all combinations of Moho_km within each (Lat, Long)
for (lat, lon), group in dupes.groupby(["Lat", "Long"]):
    rows = group.reset_index(drop=True)
    for (i1, row1), (i2, row2) in combinations(rows.iterrows(), 2):
        moho1 = row1["Moho_km"]
        moho2 = row2["Moho_km"]
        diff = abs(moho1 - moho2)
        pairwise_results.append({
            "Lat": lat,
            "Long": lon,
            "Moho_km_1": moho1,
            "Moho_km_2": moho2,
            "Diff": diff
        })

# Convert to DataFrame
pairwise_df = pd.DataFrame(pairwise_results)

# Sort by Diff
pairwise_df_sorted = pairwise_df.sort_values(by="Diff", ascending=False).reset_index(drop=True)

# Display top 10
print(pairwise_df_sorted.head(500))


  data_eq_raw = pd.read_csv(path_to_data_file, comment='#', usecols=["Lat", "Long", "Moho_km"])


        Lat     Long  Moho_km_1  Moho_km_2  Diff
0    29.700   91.150       88.0       26.0  62.0
1    29.700   91.150       80.0       26.0  54.0
2    29.700   91.150       79.9       26.0  53.9
3    28.730   89.660       76.0       25.0  51.0
4    34.450  -98.240       25.0       70.0  45.0
..      ...      ...        ...        ...   ...
495   7.970   39.130       27.0       38.2  11.2
496  13.020   77.570       46.0       34.9  11.1
497  50.005    4.595       25.0       36.1  11.1
498  -7.960   31.630       37.9       49.0  11.1
499 -42.420  173.540       34.5       23.4  11.1

[500 rows x 5 columns]


In [4]:
import xarray as xr

# Open the NetCDF file
ds = xr.open_dataset("weighted_moho.nc")

# Print contents
print(ds)

# Access variables
print(ds['weighted_moho'])

# Convert to pandas DataFrame if needed
df = ds.to_dataframe().reset_index()
print(df)

<xarray.Dataset> Size: 3MB
Dimensions:        (index: 92346)
Coordinates:
  * index          (index) int64 739kB 0 1 2 3 4 ... 92342 92343 92344 92345
Data variables:
    latitude       (index) float64 739kB ...
    longitude      (index) float64 739kB ...
    weighted_moho  (index) float64 739kB ...
<xarray.DataArray 'weighted_moho' (index: 92346)> Size: 739kB
[92346 values with dtype=float64]
Coordinates:
  * index    (index) int64 739kB 0 1 2 3 4 5 ... 92341 92342 92343 92344 92345
       index  latitude  longitude  weighted_moho
0          0     36.43      94.87      49.500000
1          1     36.43      94.87      48.000000
2          2     36.81      92.95      49.000000
3          3     37.02      91.74      57.000000
4          4     32.25      91.70      74.000000
...      ...       ...        ...            ...
92341  92341    -89.50     175.50      35.096589
92342  92342    -89.50     176.50      35.094330
92343  92343    -89.50     177.50      35.091988
92344  92344    -89.