In [1]:
import os

In [2]:
def clean_file(file_path):
    """Function to clean a single file by removing metadata and renaming with scientific name."""
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Find the line where the table starts (line with "Genus,Species,...")
    table_start_idx = None
    for i, line in enumerate(lines):
        if line.startswith("Genus,Species"):
            table_start_idx = i
            break
    
    if table_start_idx is None:
        print(f"Could not find data table in file: {file_path}")
        return
    
    # Extract the scientific name from the first row of data after the header
    genus, species = lines[table_start_idx + 1].split(',')[:2]
    scientific_name = f"{genus.strip()}_{species.strip()}"

    # Clean the file by removing all lines before the table starts
    cleaned_lines = lines[table_start_idx:]

    # Create the new file path using the scientific name
    new_file_path = os.path.join(os.path.dirname(file_path), f"{scientific_name}.csv")

    # Save the cleaned file
    with open(new_file_path, 'w') as cleaned_file:
        cleaned_file.writelines(cleaned_lines)

    print(f"Processed and saved: {new_file_path}")


In [3]:
def process_folder(folder_path):
    """Function to process all CSV files in a folder."""
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            clean_file(file_path)

In [4]:
# Specify the folder containing the CSV files
folder_path = "/Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw"

# Process all files in the folder
process_folder(folder_path)

Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Mytella_charruana.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Selene_peruviana.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Sphyrna_lewini.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Lutjanus_argentiventris.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Mugil_cephalus.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Dosidicus_gigas.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/AquaMaps Data/Raw/Coryphaena_hippurus.csv
Processed and saved: /Users/kayladerman/github/mids-w210-capstone-browning-derman-solomon/A