In [2]:
import pandas as pd

# File path
path_to_data_file = r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\Global_crust_raw.csv"

# Read full data
data_eq_raw = pd.read_csv(path_to_data_file, comment='#')

# Clean Moho_km
data_eq_raw["Moho_km"] = pd.to_numeric(data_eq_raw["Moho_km"], errors="coerce")
data_eq_all = data_eq_raw.dropna(subset=["Moho_km"]).reset_index(drop=True)

# Filter coordinates
data_eq_ind = data_eq_all[
    (data_eq_all["Long"] >= 65) & (data_eq_all["Long"] <= 110) &
    (data_eq_all["Lat"] >= 5) & (data_eq_all["Lat"] <= 45)
].reset_index(drop=True)

# Remove duplicate (Lat, Long, Moho_km)
data_eq_raw2 = data_eq_all.drop_duplicates(subset=["Lat", "Long", "Moho_km"], keep="first").reset_index(drop=True)

# Group by (Lat, Long)
grouped = data_eq_raw2.groupby(["Lat", "Long"])

filtered_rows = []
ambiguous_rows = []

for (lat, long), group in grouped:
    if len(group) == 1:
        filtered_rows.append(group.iloc[0])
    else:
        # Separate those with and without Avg_Vp/Vs
        with_vpvs = group.dropna(subset=["Avg_Vp/Vs"])
        without_vpvs = group[group["Avg_Vp/Vs"].isna()]

        if len(with_vpvs) == 1:
            filtered_rows.append(with_vpvs.iloc[0])
        elif len(with_vpvs) > 1:
            # Check for 'Hk' in comments among those with Vp/Vs
            hk_with_vpvs = with_vpvs[with_vpvs["Comments (Method - other notes etc.)"].str.contains("Hk", case=False, na=False)]
            if len(hk_with_vpvs) == 1:
                filtered_rows.append(hk_with_vpvs.iloc[0])
            else:
                ambiguous_rows.append(group)
        else:
            # No Vp/Vs available → check for 'Hk' among all
            hk_all = group[group["Comments (Method - other notes etc.)"].str.contains("Hk", case=False, na=False)]
            if len(hk_all) == 1:
                filtered_rows.append(hk_all.iloc[0])
            else:
                ambiguous_rows.append(group)

# Final cleaned dataframe
data_eq = pd.DataFrame(filtered_rows).reset_index(drop=True)

# Save cleaned data to CSV
data_eq.to_csv(r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\Global_crust.csv", index=False)

# Ambiguous rows to CSV
if ambiguous_rows:
    ambiguous_df = pd.concat(ambiguous_rows).reset_index(drop=True)
    ambiguous_path = r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\ambiguous_entries.csv"
    ambiguous_df.to_csv(ambiguous_path, index=False)
    print(f"Ambiguous entries saved to: {ambiguous_path} ({ambiguous_df.shape[0]} rows)")

print(f"Final cleaned data_eq rows: {data_eq.shape[0]}")

data_eq

  data_eq_raw = pd.read_csv(path_to_data_file, comment='#')


Ambiguous entries saved to: D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\ambiguous_entries.csv (4243 rows)
Final cleaned data_eq rows: 28778


Unnamed: 0,Net,Station,Lat,Long,Moho_km,err_moho,Avg_Vp/Vs,Poisson’s_ratio,Elevation,Avg_Vp(km/s),...,Compilation,Comments (Method - other notes etc.),Sub-methods,Comments2,H2 (other methods),err,Vsn2,κ2,Unnamed: 25,Unnamed: 26
0,,,-90.000000,0.000000,38.0,,1.83,,,,...,Stephenson 2024,Hk,,,,,,,,
1,,,-90.000000,140.000000,26.0,,1.63,,,,...,Stephenson 2024,Hk,,,,,,,,
2,,,-89.930000,144.360000,40.0,,1.73,,,,...,Stephenson 2024,Hk,,,,,,,,
3,,,-89.930000,145.000000,39.0,,1.76,,,,...,Stephenson 2024,Hk,,,,,,,,
4,,,-87.420000,-149.430000,35.0,,1.83,,,,...,Stephenson 2024,Hk,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28773,,,80.170000,18.730000,31.8,,-,,,,...,Stephenson 2024,Refraction,P_reversed,,,,,,,
28774,,,80.768692,47.376292,31.0,,-,,,,...,Stephenson 2024,RF_other,forward_model,,,,,,,
28775,,,81.600000,-16.660000,32.0,,1.8,,,,...,Stephenson 2024,Hk,,,,,,,,
28776,,,82.500000,-62.350000,25.0,,1.81,,,,...,Stephenson 2024,Hk,,,,,,,,


In [6]:
# Split based on presence of 'Hk' in Comments
data_eq_hk = data_eq[data_eq["Comments (Method - other notes etc.)"].str.contains("Hk", case=False, na=False)].reset_index(drop=True)
data_eq_rest = data_eq[~data_eq["Comments (Method - other notes etc.)"].str.contains("Hk", case=False, na=False)].reset_index(drop=True)

# Optionally save to CSV
data_eq_hk.to_csv(r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\Global_crust_Hk.csv", index=False)
data_eq_rest.to_csv(r"D:\Amitava\Projects\Spline_Moho\Global_moho_compilation\Global_crust_rest.csv", index=False)


In [7]:
data_eq_hk

Unnamed: 0,Net,Station,Lat,Long,Moho_km,err_moho,Avg_Vp/Vs,Poisson’s_ratio,Elevation,Avg_Vp(km/s),...,Compilation,Comments (Method - other notes etc.),Sub-methods,Comments2,H2 (other methods),err,Vsn2,κ2,Unnamed: 25,Unnamed: 26
0,,,-90.00,0.00,38.0,,1.83,,,,...,Stephenson 2024,Hk,,,,,,,,
1,,,-90.00,140.00,26.0,,1.63,,,,...,Stephenson 2024,Hk,,,,,,,,
2,,,-89.93,144.36,40.0,,1.73,,,,...,Stephenson 2024,Hk,,,,,,,,
3,,,-89.93,145.00,39.0,,1.76,,,,...,Stephenson 2024,Hk,,,,,,,,
4,,,-87.42,-149.43,35.0,,1.83,,,,...,Stephenson 2024,Hk,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20879,,,78.92,11.94,27.0,,1.75,,,,...,Stephenson 2024,Hk,,,,,,,,
20880,,,78.93,11.94,28.0,,1.75,,,,...,Stephenson 2024,Hk,-,,,,,,,
20881,,,80.05,-86.42,27.0,,2.1,,,,...,Stephenson 2024,Hk,,,,,,,,
20882,,,81.60,-16.66,32.0,,1.8,,,,...,Stephenson 2024,Hk,,,,,,,,


In [8]:
data_eq_rest

Unnamed: 0,Net,Station,Lat,Long,Moho_km,err_moho,Avg_Vp/Vs,Poisson’s_ratio,Elevation,Avg_Vp(km/s),...,Compilation,Comments (Method - other notes etc.),Sub-methods,Comments2,H2 (other methods),err,Vsn2,κ2,Unnamed: 25,Unnamed: 26
0,,,-82.120000,-115.800000,33.75,,-,,,,...,Stephenson 2024,Refraction,P_reversed,,,,,,,
1,,,-81.950000,-113.530000,31.50,,-,,,,...,Stephenson 2024,Refraction,P_unreversed,,,,,,,
2,,,-81.370000,-107.280000,32.50,,-,,,,...,Stephenson 2024,Refraction,P_unreversed,,,,,,,
3,,,-76.500000,15.000000,39.82,,-,,,,...,Stephenson 2024,Refraction,P_reversed,,,,,,,
4,,,-75.850000,161.400000,41.00,,-,,,,...,Stephenson 2024,Reflection,-,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7889,,,80.053300,-86.415800,34.00,,-,,,,...,Stephenson 2024,RF_other,RF_inversion,,,,,,,
7890,,SQH,80.080000,32.500000,70.20,,1.76,0.262,,6.47,...,Singh 2015,,,,,,,,,
7891,,,80.170000,18.730000,31.80,,-,,,,...,Stephenson 2024,Refraction,P_reversed,,,,,,,
7892,,,80.768692,47.376292,31.00,,-,,,,...,Stephenson 2024,RF_other,forward_model,,,,,,,
