# filter nc by concentration

In [None]:
import os
import pandas as pd
import shutil
import netCDF4


In [None]:

# Paths to folders
input_folder = r'E:\Sea Ice Classification\AI4ArcticASIP Sea ice Dataset version2'
output_csv_dir = os.path.join(input_folder, '70csv_output')
selected_nc_dir = os.path.join(input_folder, 'selected_nc_files')

# Create folders if they don't exist
os.makedirs(output_csv_dir, exist_ok=True)
os.makedirs(selected_nc_dir, exist_ok=True)


In [None]:

# Iterate through .nc files
for filename in os.listdir(input_folder):
    if filename.endswith('.nc'):
        nc_file = os.path.join(input_folder, filename)
        file_base = os.path.splitext(filename)[0]

        try:
            # Open the .nc file
            ncf = netCDF4.Dataset(nc_file)

            # Check for polygon data
            if 'polygon_icechart' in ncf.variables and 'polygon_codes' in ncf.variables:
                polygon_codes = ncf.variables['polygon_codes'][:]

                # Save polygon_codes to CSV
                out_csv_filename = os.path.join(output_csv_dir, f'{file_base}_PolygonCodes.csv')
                with open(out_csv_filename, mode='w', newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow(['Index', 'Code'])
                    writer.writerows(enumerate(polygon_codes))

                print(f"Saved CSV for {file_base}")

                # Split and save CSV columns
                df = pd.read_csv(out_csv_filename)
                df_split = df['Code'].str.split(';', expand=True)
                df_split.columns = ['id', 'CT', 'CA', 'SA', 'FA', 'CB', 'SB', 'FB', 'CC', 'SC', 'FC', 'CN', 'CD', 'CF', 'POLY_TYPE']
                final_df = pd.concat([df['Index'], df_split], axis=1)
                final_csv_filename = os.path.join(output_csv_dir, f'{file_base}_separated_columns.csv')
                final_df.to_csv(final_csv_filename, index=False)
                print(f"Saved separated columns CSV for {file_base}")

                # Check columns for data > 70
                if final_df[['CA', 'CB', 'CC']].apply(pd.to_numeric, errors='coerce').max().max() > 70:
                    shutil.copy(nc_file, os.path.join(selected_nc_dir, filename))
                    print(f"File {filename} contains values > 70 in CA/CB/CC and was copied.")

            ncf.close()

        except Exception as e:
            print(f"Error processing {filename}: {e}")

print("Processing complete.")
