In [None]:
import pandas as pd

# 1. Load the CSV file
# NOTE: Make sure the file path is correct for your environment
df = pd.read_csv("D:\Documents\SARA\MIYARU\citizen science\miyaru cs apr25.csv")

# 2. Check the existing column names
print("Old column names:")
print(df.columns)

In [None]:
df.columns = ['Submission ID', 'Last updated', 'Submission started', 'Status',
       'Current step', 'Name',
       'Affiliation',
       'Email',
       'Updates_required',
       'Species',
       'Number',
       'Depth_range',
       'Size',
       'Behaviours',
       'Injuries',
       'Mating_scars',
       'Notes',
       'Media',
       'Atoll', 'Island',
       'Site_name',
       'Coords',
       'Habitat',
       'Activity',
       'Encounter_date',
       'Start_time',
       'Duration',
       'Surface_temperature',
       'Bottom_temperature',
       'Visibility',
       'Tide', 'Current_strength',
       'Current_direction',
       'Water_movement',
       'N_people',
       'N_Boats',
       'Bait',
       'Bait_type',
       'Bait_composition',
       'Bait_amount',
       'Notes_2',
       'Errors', 'Url', 'Network ID']

# 4. Verify the columns have been renamed
print("\nNew column names:")
print(df.columns)


In [None]:
df

In [None]:

###############################################################################
# 1. DICTIONARIES OF ABBREVIATIONS
###############################################################################
species_to_abbr = {
    "Blacktip Reef Shark (Carcharhinus melanopterus)": "BT",
    "Whitetip Reef Shark (Triaenodon obesus)": "WT",
    "Grey Reef Shark (Carcharinus amblyrhynchos)": "Gr",
    "Silvertip Shark (Carcharhinus albimarginatus)": "ST",
    "Nurse Shark (Nebrius ferrugineus)": "Nu",
    "Zebra Shark (Stegostoma fasciatum)": "Ze",
    "Leopard/Zebra Shark (Stegostoma fasciatum)": "Ze",
    "Spinner Shark (Carcharhinus brevipinna)": "Sp",
    "Tiger Shark (Galeocerdo Cuvier)": "Ti",
    "Bull Shark (Carcharhinus leucas)": "Bu",
    "Lemon Shark (Negaprion acutidens)": "Le",
    "Great Hammerhead Shark (Sphyrna mokarran)": "GH",
    "Scalloped Hammerhead Shark (Sphyrna lewini)": "SH",
    "Pelagic Thresher Shark (Alopias pelagicus)": "PT",
    "Bigeye Thresher Shark (Alopias superciliosus)": "BET",
    "Oceanic Whitetip Shark (Carcharhinus longimanus)": "OW",
    "Shortfin Mako Shark (Isurus oxyrinchus)": "SM",
    "Smalltooth Sand Tiger Shark (Odantaspis ferox)": "SS",
    "Giant Guitarfish (Rhynchobatus djiddensis)": "GG",
    "Bowmouth Guitarfish (Rhina ancylostoma)": "BG",
}

atoll_to_abbr = {
    "Haa Alif": "HA",
    "Haa Dhaal": "HDh",
    "Shaviyani": "Sh",
    "Noonu": "N",
    "Lhavhiyani": "Lh",
    "Raa": "R",
    "Baa": "B",
    "Kaafu": "K",
    "Alif Alif": "AA",
    "Alif Dhaal": "ADh",
    "Vaavu": "V",
    "Dhaalu": "Dh",
    "Meemu": "M",
    "Thaa": "T",
    "Laamu": "L",
    "Gaaf Alif": "GA",
    "Gaaf Dhaal": "GDh",
    "Gnaviyani (Fuvahmulah)": "Gn",
    "Seenu (Addu)": "S",
}

##########################################################################
# 2) Convert Encounter_date to datetime and create yyyymmdd column
##########################################################################
df["Encounter_date"] = pd.to_datetime(df["Encounter_date"], errors='coerce')
df["Date_yyyymmdd"] = df["Encounter_date"].dt.strftime("%Y%m%d")

##########################################################################
# 3) Map species and atoll columns to abbreviations
##########################################################################
df["Species_Abbr"] = df["Species"].map(species_to_abbr)
df["Atoll_Abbr"] = df["Atoll"].map(atoll_to_abbr)

##########################################################################
# 4) Build the grouping-based counter
#    For each unique (Species, Atoll, Encounter_date), 
#    assign an incrementing counter: 1, 2, 3...
##########################################################################
# Drop any rows lacking Species, Atoll, or Encounter_date
df.dropna(subset=["Species", "Atoll", "Encounter_date"], inplace=True)

# Now group and cumcount
df["n"] = (
    df.groupby(["Species", "Atoll", "Encounter_date"])["Encounter_date"]
    .cumcount()
    + 1
)

# This will now work without error
df["n"] = df["n"].astype(int)
##########################################################################
# 5) Construct the Unique_ID column
##########################################################################
df["Unique_ID"] = (
    df["Species_Abbr"].astype(str)
    + "_"
    + df["Atoll_Abbr"].astype(str)
    + "_"
    + df["Date_yyyymmdd"].astype(str)
    + "_"
    + df["n"].astype(str)
)

# (Optional) See how it looks
df

In [None]:
import pandas as pd

# 1. Load the CSV file
# NOTE: Make sure the file path is correct for your environment
df2 = pd.read_csv("/Volumes/AugustMV25B/miyaru cs apr23-apr24.csv")

# 2. Check the existing column names
print("Old column names:")
print(df2.columns)


In [None]:
df2