Standardize
* naming of files
* metadata of .nc files, make sure this is detailed

In [1]:
import os

# Define the directory containing your .nc files
directory = '/content/'


file_mapping = {
    "Rayleigh_Phase_10mHz.5m_EASE9262.68m.nc": "rayleigh_phase",
    "IGRF_inc_grid.nc": "igrf_inc",
    "interpolated_bouguer_from_csv.nc": "interpolated_bouguer",
    "interpolated_MF7_from_csv.nc": "interpolated_mf7",
    "SC_CRUST_VP_MS_CRUST1s.5m_EASE9262.68m.nc": "sc_crust_vp",
    "WGM2012_Freeair_ponc.5m_EASE9262.68m.nc": "wgm2012_freeair_ponc",
    "GL_TOT_SED_THICK_M_GLOBSED_Straume.5m_EASE9262.68m.nc": "gl_tot_sed_thick",
    "Love_Phase_10mHz.5m_EASE9262.68m.nc": "love_phase",
    "Rayleigh_Group_10mHz.5m_EASE9262.68m.nc": "rayleigh_group",
    "IGRF_dec_grid.nc": "igrf_dec",
    "SC_CRUST_AGE_MA_Seton2020.5m_EASE9262.68m.nc": "sc_crust_age",
    "SC_CRUST_VS_MS_CRUST1s.5m_EASE9262.68m.nc": "sc_crust_vs",
    "CM_CURIE_POINT_DEPTH_KM_Li2017.5m_EASE9262.68m.nc": "cm_curie_point_depth",
    "GL_ELEVATION_M_ASL_SRTM15+V2.5m_EASE9262.68m.nc": "gl_elevation",
    "SL_VGG_EOT.5m_EASE9262.68m.nc": "sl_vgg_eot",
    "SC_CRUST_DEN_KGM3_CRUST1s.5m_EASE9262.68m.nc": "sc_crust_den",
    "interpolated_EMM_from_csv.nc": "interpolated_emm",
    "Love_Group_10mHz.5m_EASE9262.68m.nc": "love_group",
    'sed_thick_filled.nc': 'gl_tot_sed_thick'

}



# Function to rename files
def rename_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.nc'):
            # Example of extracting parts from the filename or manually defining them
            try:
              variable_name = file_mapping[filename]
            except KeyError:
              print(f"No mapping found for {filename}")
              continue
            resolution = '100km^2'

            # Create a new filename
            if 'interpolated' in variable_name:
              new_filename = f"interpolated_{variable_name}_{resolution}.nc"
            elif variable_name == 'gl_tot_sed_thick':
              new_filename = f"sed_thick_filled_{resolution}.nc"
            else:
              new_filename = f"{variable_name}_{resolution}.nc"

            # Construct full file paths
            old_file = os.path.join(directory, filename)
            new_file = os.path.join(directory, new_filename)

            # Rename the file
            os.rename(old_file, new_file)
            print(f"Renamed {old_file} to {new_file}")

rename_files(directory)


Renamed /content/IGRF_inc_grid.nc to /content/igrf_inc_100km^2.nc
Renamed /content/interpolated_EMM_from_csv.nc to /content/interpolated_interpolated_emm_100km^2.nc
Renamed /content/interpolated_bouguer_from_csv.nc to /content/interpolated_interpolated_bouguer_100km^2.nc
Renamed /content/SL_VGG_EOT.5m_EASE9262.68m.nc to /content/sl_vgg_eot_100km^2.nc
Renamed /content/SC_CRUST_AGE_MA_Seton2020.5m_EASE9262.68m.nc to /content/sc_crust_age_100km^2.nc
Renamed /content/GL_TOT_SED_THICK_M_GLOBSED_Straume.5m_EASE9262.68m.nc to /content/sed_thick_filled_100km^2.nc
Renamed /content/GL_ELEVATION_M_ASL_SRTM15+V2.5m_EASE9262.68m.nc to /content/gl_elevation_100km^2.nc
Renamed /content/SC_CRUST_VP_MS_CRUST1s.5m_EASE9262.68m.nc to /content/sc_crust_vp_100km^2.nc
Renamed /content/Rayleigh_Group_10mHz.5m_EASE9262.68m.nc to /content/rayleigh_group_100km^2.nc
Renamed /content/Rayleigh_Phase_10mHz.5m_EASE9262.68m.nc to /content/rayleigh_phase_100km^2.nc
Renamed /content/sed_thick_filled.nc to /content/sed_

In [2]:
import xarray as xr


# Function to add metadata and rename variables uniformly
def add_metadata(filepath, new_variable_name, resolution, date):
    # Open the NetCDF file
    try:
        ds = xr.open_dataset(filepath)
    except:
        print(f"Failed to open {filepath}")
        return None

    # Add global attributes
    ds.attrs['resolution'] = resolution
    ds.attrs['source'] = 'MagNav Magnetic Anomaly Prediction Project'
    ds.attrs['date'] = date

    # Rename all variables to the new name
    new_vars = {var_name: new_variable_name for var_name in ds.data_vars}
    ds = ds.rename_vars(new_vars)

    # Save the modified dataset
    ds.to_netcdf(filepath)
    ds.close()
    print(f"Metadata added and all variables renamed to '{new_variable_name}' in {filepath}")

# Example usage
directory = '/content/'

for filename in os.listdir(directory):
    if filename.endswith('.nc'):
        add_metadata(os.path.join(directory, filename), 'z', '100km^2', '2024-07-15')



Metadata added and all variables renamed to 'z' in /content/sc_crust_vp_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/love_group_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/interpolated_interpolated_emm_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/gl_elevation_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/sc_crust_den_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/igrf_inc_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/sl_vgg_eot_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/love_phase_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/wgm2012_freeair_ponc_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/interpolated_interpolated_bouguer_100km^2.nc
Metadata added and all variables renamed to 'z' in /content/cm_curie_point_depth_100km^2.nc
Metadata added and all variables 

In [None]:
import shutil
import os

# Set the path to the folder you want to zip
folder_path = '/content/'

# Set the path and name of the zip file you want to create
zip_file_path = '/content/magnetic_anomaly_prediction_processed_data.zip'

# Create a zip file of the folder
shutil.make_archive(zip_file_path[:-4], 'zip', folder_path)



In [None]:
# Optionally, you can move the zip file to a different directory
shutil.move(zip_file_path, '/content/drive/MyDrive/MagneticAnomalyRegression/datasets/processed')  # Example: Moving to Google Drive