In [102]:
import pandas as pd
import numpy as np
import json
from datetime import datetime
import sys
import csv
import xarray as xr
sys.path.append('../functions')
 
from latlng2pentad import latlng2pentad
from pentad2latlng import pentad2latlng

A card is defined by three elements: pentad_code, user_id, and date of the first day of the 5 days.

From a card id {pentad}_{observer}_{date}, it is possible to find all checklists that belong to it.

The aim here is to build a list of valid cards which will then be used to find the checklist_id which belongs to this card and then finally compute the card info from the list of checklists.

Pentad: we need to assign for each checklist its pentad and check that the distance traveled is within the boundary of the pentad.
User_id is quite straightforward to build.
Date: much more challenging. See below for details.

## Set up the Import Options and import the data


In [103]:
cntr = "KE"
file = "../data/eBird/chk_{cntr}_relAug-2022/ebd_{cntr}_relAug-2022.txt"
file = "../data/eBird/ebd_AFR_relApr-2024/ebd_AFR_relApr-2024.txt.gz"

In [104]:
ebd = pd.read_csv(file, 
                   delimiter="\t",
                   usecols=["SAMPLING EVENT IDENTIFIER", "SCIENTIFIC NAME", "CATEGORY", "LATITUDE", "LONGITUDE", "OBSERVATION DATE", "TIME OBSERVATIONS STARTED", "PROTOCOL TYPE", "DURATION MINUTES", "EFFORT DISTANCE KM", "ALL SPECIES REPORTED", "OBSERVER ID"],
                   parse_dates=["OBSERVATION DATE"])

In [105]:
# Create OBSERVATIONDATETIME by combining date and time
tmp = ebd['TIME OBSERVATIONS STARTED'].fillna("00:00:00")
ebd['OBSERVATION DATETIME'] = pd.to_datetime(ebd['OBSERVATION DATE'].dt.strftime('%Y-%m-%d') + " " + tmp, format='%Y-%m-%d %H:%M:%S')

# Sort by date: Important to have for filtering duplicate card-adu and needed for sequence
ebd.sort_values(by="OBSERVATION DATE", inplace=True)

# Keep only species category
# ebd0[["COMMONNAME", "SCIENTIFIC NAME", "CATEGORY"]].drop_duplicates().to_csv("species_list_ebird.csv", index=False)

# Keep some spuh which can be matched to an ADU
# spuh_keep = pd.read_csv("data/spuh_keep.csv", dtype=str)
# ebd0 = ebd0[(~ebd0["CATEGORY"].isin(["spuh", "slash"])) | ebd0["SCIENTIFIC NAME"].isin(spuh_keep["Clements--scientific_name"])]

In [106]:
# Read species_match data
species_match = pd.read_excel("../data/World list - working v1.xlsx")
species_match.rename(columns={"Sci (eBird)": "SCIENTIFIC NAME"}, inplace=True)
species_match = species_match[~species_match["ADU"].isna() & ~species_match["SCIENTIFIC NAME"].isna()][["ADU", "SCIENTIFIC NAME"]]

# Check that all entries are matching
unmatched_entries = ebd[~ebd["SCIENTIFIC NAME"].isin(species_match["SCIENTIFIC NAME"])]
unmatched_species = unmatched_entries.groupby("SCIENTIFIC NAME")["SCIENTIFIC NAME"].count().sort_values(ascending=0)
print(unmatched_species)

# Export species list
unmatched_species.to_csv("../data/unmatched_species.csv")

SCIENTIFIC NAME
Spilopelia senegalensis    192372
Microcarbo africanus        97306
Icthyophaga vocifer         85518
Zosterops virens            66501
Crinifer concolor           65714
                            ...  
Crotophaga ani                  1
Platycercus eximius             1
Pittidae sp.                    1
Pionus menstruus                1
Phaenicophaeus tristis          1
Name: SCIENTIFIC NAME, Length: 1073, dtype: int64


## Build checklist level dataframe


In [107]:
chk = ebd[["SAMPLING EVENT IDENTIFIER", "LATITUDE", "LONGITUDE", "OBSERVATION DATE", "OBSERVATION DATETIME", "PROTOCOL TYPE", "DURATION MINUTES", "EFFORT DISTANCE KM", "ALL SPECIES REPORTED", "OBSERVER ID"]].drop_duplicates()

In [108]:
# For some shared checklist some variable are different for the same sampling event.
# chk[chk["SAMPLING EVENT IDENTIFIER"].duplicated(keep=False)].sort_values(by="SAMPLING EVENT IDENTIFIER")
# ebd0[ebd0["SAMPLING EVENT IDENTIFIER"] == "S97700871"]
chk = chk.drop_duplicates("SAMPLING EVENT IDENTIFIER") 
len(chk)

1190311

In [109]:
# Sort by date
chk.sort_values(by="OBSERVATION DATE", inplace=True)

# Filter protocol
chk["KEEP PROTOCOL"] = chk["PROTOCOL TYPE"].isin(["Historical", "Incidental", "Stationary", "Traveling"])

# Pentad
# Assign to pentad and check if distance remains inside
chk["PENTAD"] = latlng2pentad(chk["LATITUDE"], chk["LONGITUDE"])

# Search center of pentad
lat, lon = pentad2latlng(chk["PENTAD"])
dist = (5 / 60 / 2) * 1.2  # allow for a 20% overlap

effort_distance_deg = (180/np.pi) *  chk["EFFORT DISTANCE KM"] / 6371
chk["KEEP PENTAD"] = ~((effort_distance_deg + np.maximum(np.abs(lat - chk["LATITUDE"]), np.abs(lon - chk["LONGITUDE"]))) > dist)

# Also filter historical checklists which have no distance
chk.loc[(chk["PROTOCOL TYPE"] == "Historical") & chk["EFFORT DISTANCE KM"].isna(), "KEEP PENTAD"] = False

## Find valid full protocol card

Cards are considered to be full protocol if the sum of durations of the underlying checklists exceed 2 hours over the next rolling 5 days.
In this section, we first indentify which checklists can create a valid full card.


In [110]:
# Find the index of all checklists which contribute to the 2hr rule. Note that we will still use "non-valid" checklists later as their species still contribute to the card.
valid_id = (chk["KEEP PENTAD"] & chk["KEEP PROTOCOL"] & (chk["DURATION MINUTES"] > 0) & chk["ALL SPECIES REPORTED"])

# Filter for valid checklist and create in a smaller table
check = chk.loc[valid_id, ["PENTAD", "OBSERVER ID", "OBSERVATION DATE", "DURATION MINUTES"]]

# Combine checklists made by the same observer, pentad, and day. This is an intermediate step which enables us to grid the 5days windows more easily
checkday = check.groupby(["PENTAD", "OBSERVER ID", "OBSERVATION DATE"]).agg({"DURATION MINUTES": "sum"}).reset_index()

# Sort the checklist by id and date
checkday.sort_values(by=["OBSERVATION DATE"], inplace=True)

# Create additional columns
checkday["pentad_observer"] = checkday["PENTAD"] + "_" + checkday["OBSERVER ID"]
checkday["pentad_observer_date"] = checkday["PENTAD"] + "_" + checkday["OBSERVER ID"].str[3:] + "_" + checkday["OBSERVATION DATE"].dt.strftime("%Y%m%d")

In [111]:
# Do a first filter to eliminate all pentad_observer witout sufficient total duration time. (Aim is to just reduce the computation later)
pentad_observer_duration = checkday.groupby(['pentad_observer'])['DURATION MINUTES'].sum()
pentad_observer_duration_index = pentad_observer_duration[pentad_observer_duration >= 2*60].index
checkday_long = checkday[checkday["pentad_observer"].isin(pentad_observer_duration_index)]

# Second filter for reducing the test case
# pentad_observer_unique = checkday_long["pentad_observer"].unique()
# pentad_observer_unique = pentad_observer_unique[0:1000]
# checkday_long = checkday_long[checkday_long["pentad_observer"].isin(pentad_observer_unique)]

In [112]:
def checkday_pentad_observer(df):
    df["CARD"] = ""
    # Build a matrix of distance between all checklists to check if they are close to each other
    di = np.abs(df[ "OBSERVATION DATE"].values[:, None] - df["OBSERVATION DATE"].values) < pd.Timedelta(days=5)
    # create duration array to make computation slightly faster
    duration = df['DURATION MINUTES'].to_numpy()
    # Initie the card array with empty string
    # card = np.array(['' for x in range(len(df))], dtype='object')
    u = 1
    # Loop trough the list of checklists
    while u <= len(df):
        # Find all neighbord
        nb_neighbor = np.sum(di[u-1, (u-1):])
        neigh = u + np.arange(0, nb_neighbor) - 1
        dur = duration[neigh].sum()
        # Check that total duration is more than 2hours, if so add card code (pentad_observer_date) to card array
        if dur >= (2*60):
            df.iloc[neigh, df.columns.get_loc("CARD")] = df.iloc[u-1, df.columns.get_loc("pentad_observer_date")]
        u += nb_neighbor
    return df

In [113]:
# Apply the function defined above for each pentad-observer at the same time (makes operation much faster)
checkday_long_card = checkday_long.groupby("pentad_observer").apply(checkday_pentad_observer, include_groups=False).reset_index()

In [114]:
# Create the card DataFrame
card = checkday_long_card[checkday_long_card["CARD"] == checkday_long_card["pentad_observer_date"]][["PENTAD", "OBSERVER ID", "OBSERVATION DATE", "CARD"]]

# Sort by card
card.sort_values(by='CARD', inplace=True)

In [115]:
card

Unnamed: 0,PENTAD,OBSERVER ID,OBSERVATION DATE,CARD
4,0000_0920,obsr431479,2024-02-18,0000_0920_r431479_20240218
5,0000_1135,obsr344577,1997-07-22,0000_1135_r344577_19970722
6,0000_1140,obsr107968,1996-07-08,0000_1140_r107968_19960708
7,0000_1140,obsr167174,1996-07-08,0000_1140_r167174_19960708
8,0000_2930,obsr3868323,2023-05-13,0000_2930_r3868323_20230513
...,...,...,...,...
142838,3715c1010,obsr536040,2022-05-21,3715c1010_r536040_20220521
142839,3720c0950,obsr1896095,2020-03-02,3720c0950_r1896095_20200302
142840,3720c0950,obsr490904,2020-03-02,3720c0950_r490904_20200302
142841,3720c0950,obsr536040,2020-03-02,3720c0950_r536040_20200302


## Create Card

We take back `chk` where all checklists (i.e., including the incidentals, stationary, etc...) and find if they contribute to an existing full card.


In [116]:
# Filter for checklist with pentad and observer present in card
chk_keep = chk[ (chk['KEEP PENTAD']) & ((chk['PENTAD']+chk['OBSERVER ID']).isin((card['PENTAD']+card['OBSERVER ID'])))]

In [117]:
# Combine all possible card for all checklists with the same observer and pentad. 
chk_card = pd.merge(
    chk_keep, # .loc[:,['OBSERVER ID', 'PENTAD', "SAMPLING EVENT IDENTIFIER", "OBSERVATION DATE"]], 
    card, 
    on=['OBSERVER ID', 'PENTAD'], suffixes=('_chk', '_card'), how="left")

# Filter for date within the 5 days
duration = (chk_card['OBSERVATION DATE_chk'] - chk_card['OBSERVATION DATE_card']).dt.days
chk_card = chk_card[(duration>=0) & (duration<5)]

In [118]:
card_chk = chk_card.groupby("CARD").agg({
    'SAMPLING EVENT IDENTIFIER': list,
    'OBSERVATION DATETIME': ['min', 'max'], 
    'DURATION MINUTES': 'sum', 
    'EFFORT DISTANCE KM': 'sum'}).reset_index()
card_chk.columns = ['_'.join(col).strip('_') for col in card_chk.columns.values]

# merge with card
card_chk = pd.merge(
    card_chk,
    card,
    on="CARD",
    how="inner"
)
card_chk

Unnamed: 0,CARD,SAMPLING EVENT IDENTIFIER_list,OBSERVATION DATETIME_min,OBSERVATION DATETIME_max,DURATION MINUTES_sum,EFFORT DISTANCE KM_sum,PENTAD,OBSERVER ID,OBSERVATION DATE
0,0000_0920_r431479_20240218,"[S162608678, S162609512, S162852215, S16283088...",2024-02-18 12:20:00,2024-02-19 19:17:00,133.0,1.500,0000_0920,obsr431479,2024-02-18
1,0000_1135_r344577_19970722,[S13388886],1997-07-22 11:00:00,1997-07-22 11:00:00,120.0,1.000,0000_1135,obsr344577,1997-07-22
2,0000_1140_r107968_19960708,[S7529043],1996-07-08 16:30:00,1996-07-08 16:30:00,180.0,1.609,0000_1140,obsr107968,1996-07-08
3,0000_1140_r167174_19960708,[S7529228],1996-07-08 16:30:00,1996-07-08 16:30:00,180.0,1.609,0000_1140,obsr167174,1996-07-08
4,0000_2930_r3868323_20230513,[S137486656],2023-05-13 06:30:00,2023-05-13 06:30:00,180.0,2.000,0000_2930,obsr3868323,2023-05-13
...,...,...,...,...,...,...,...,...,...
52296,3715c1010_r536040_20220521,[S110892947],2022-05-21 09:30:00,2022-05-21 09:30:00,270.0,0.000,3715c1010,obsr536040,2022-05-21
52297,3720c0950_r1896095_20200302,[S146633506],2020-03-02 07:30:00,2020-03-02 07:30:00,1200.0,0.000,3720c0950,obsr1896095,2020-03-02
52298,3720c0950_r490904_20200302,[S65389557],2020-03-02 07:30:00,2020-03-02 07:30:00,1200.0,0.000,3720c0950,obsr490904,2020-03-02
52299,3720c0950_r536040_20200302,[S65387334],2020-03-02 07:30:00,2020-03-02 07:30:00,1200.0,0.000,3720c0950,obsr536040,2020-03-02


## Get species level information


In [119]:
# Filter the full dataset to get only the checklist  valid...
ebd_f = ebd.loc[ebd["SAMPLING EVENT IDENTIFIER"].isin(chk_card["SAMPLING EVENT IDENTIFIER"]),["SAMPLING EVENT IDENTIFIER", "SCIENTIFIC NAME", "OBSERVATION DATETIME", "LATITUDE", "LONGITUDE", "EFFORT DISTANCE KM"]]

# Add card No
ebd_f = pd.merge(
    ebd_f, 
    chk_card.loc[:,["SAMPLING EVENT IDENTIFIER", "CARD"]], 
    on="SAMPLING EVENT IDENTIFIER", 
    how="left")

# Keep a unique list of card-species (remove duplicate species in the same card)
ebd_f.sort_values(by="OBSERVATION DATETIME", inplace=True) # SHould have been done already above, but necessary for keep="first"
ebd_f_u = ebd_f.drop_duplicates(subset=["CARD", "SCIENTIFIC NAME"], keep='first')

In [120]:
# Add ADU number
ebd_f_u_sp = pd.merge(ebd_f_u, species_match, how="left")

# Set un-macthed species to undefined
ebd_f_u_sp['ADU'] = ebd_f_u_sp['ADU'].fillna(0)

# Compute the sequence of records based on datetime entry
ebd_f_u_sp["SEQ"] = ebd_f_u_sp.groupby("CARD")["OBSERVATION DATETIME"].rank(method="min").astype(int) # dense t

ebd_f_u_sp['EFFORT DISTANCE KM'] = ebd_f_u_sp['EFFORT DISTANCE KM'].fillna('')

ebd_f_u_sp['OBSERVATION DATETIME'] = ebd_f_u_sp['OBSERVATION DATETIME'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')
ebd_f_u_sp

  ebd_f_u_sp['ADU'] = ebd_f_u_sp['ADU'].fillna(0)


Unnamed: 0,SAMPLING EVENT IDENTIFIER,SCIENTIFIC NAME,OBSERVATION DATETIME,LATITUDE,LONGITUDE,EFFORT DISTANCE KM,CARD,ADU,SEQ
0,S80173131,Pternistis swainsonii,1961-01-06T07:00:00Z,-16.755902,30.238781,,1645_3010_r2136318_19610106,185,1
1,S80172444,Bubulcus ibis,1961-01-12T07:00:00Z,-16.755902,30.238781,,1645_3010_r2136318_19610112,61,1
2,S80172483,Ciconia abdimii,1961-01-12T07:00:00Z,-16.755902,30.238781,,1645_3010_r2136318_19610112,78,1
3,S80172263,Egretta garzetta,1961-01-12T07:30:00Z,-16.755902,30.238781,,1645_3010_r2136318_19610112,59,3
4,S80173421,Struthio camelus,1961-01-17T07:00:00Z,-16.755902,30.238781,,1645_3010_r2136318_19610117,1,1
...,...,...,...,...,...,...,...,...,...
1849762,S171161220,Bubulcus ibis,2024-04-30T20:00:00Z,0.208632,32.297788,3.74,0015c3215_r3121048_20240430,61,1
1849763,S172072861,Cossypha niveicapilla,2024-04-30T20:00:00Z,0.208632,32.297788,3.74,0015c3215_r3392193_20240430,2574,1
1849764,S171161220,Nicator chloris,2024-04-30T20:00:00Z,0.208632,32.297788,3.74,0015c3215_r3121048_20240430,2996,1
1849765,S172072861,Halcyon malimbica,2024-04-30T20:00:00Z,0.208632,32.297788,3.74,0015c3215_r3392193_20240430,1131,1


In [121]:
# Extract the species list per card as a cell for vectorized computation
card_sp = ebd_f_u_sp.groupby("CARD")[["ADU", "SEQ", "LATITUDE", "LONGITUDE", "OBSERVATION DATETIME", "EFFORT DISTANCE KM"]].agg(list).reset_index()
card_sp

Unnamed: 0,CARD,ADU,SEQ,LATITUDE,LONGITUDE,OBSERVATION DATETIME,EFFORT DISTANCE KM
0,0000_0920_r431479_20240218,"[784, 3852, 394, 0, 503, 872, 0, 172, 3700, 35...","[1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[-0.0392179, -0.0392179, -0.0392179, -0.039217...","[9.340437, 9.340437, 9.340437, 9.340437, 9.340...","[2024-02-18T12:20:00Z, 2024-02-18T12:20:00Z, 2...","[, , , , 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1...."
1,0000_1135_r344577_19970722,"[283, 306, 3888, 394, 246]","[1, 1, 1, 1, 1]","[-0.0686645, -0.0686645, -0.0686645, -0.068664...","[11.6070557, 11.6070557, 11.6070557, 11.607055...","[1997-07-22T11:00:00Z, 1997-07-22T11:00:00Z, 1...","[1.0, 1.0, 1.0, 1.0, 1.0]"
2,0000_1140_r107968_19960708,"[352, 2798, 754, 57, 399, 11491, 872, 113, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[-0.0700264, -0.0700264, -0.0700264, -0.070026...","[11.7131367, 11.7131367, 11.7131367, 11.713136...","[1996-07-08T16:30:00Z, 1996-07-08T16:30:00Z, 1...","[1.609, 1.609, 1.609, 1.609, 1.609, 1.609, 1.6..."
3,0000_1140_r167174_19960708,"[399, 754, 872, 1628, 11491, 362, 352, 1148, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[-0.0700264, -0.0700264, -0.0700264, -0.070026...","[11.7131367, 11.7131367, 11.7131367, 11.713136...","[1996-07-08T16:30:00Z, 1996-07-08T16:30:00Z, 1...","[1.609, 1.609, 1.609, 1.609, 1.609, 1.609, 1.6..."
4,0000_2930_r3868323_20230513,"[1152, 56, 112, 84, 401, 736, 0, 54, 2428, 159...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-0.0459642, -0.0459642, -0.0459642, -0.045964...","[29.5133887, 29.5133887, 29.5133887, 29.513388...","[2023-05-13T06:30:00Z, 2023-05-13T06:30:00Z, 2...","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ..."
...,...,...,...,...,...,...,...
52296,3715c1010_r536040_20220521,"[379, 2162, 0, 80, 378, 166, 976, 154, 0, 132,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[37.1811082, 37.1811082, 37.1811082, 37.181108...","[10.2289643, 10.2289643, 10.2289643, 10.228964...","[2022-05-21T09:30:00Z, 2022-05-21T09:30:00Z, 2...","[, , , , , , , , , , , , , , , ]"
52297,3720c0950_r1896095_20200302,"[7901, 47, 933]","[1, 1, 1]","[37.2771433, 37.2771433, 37.2771433]","[9.8745954, 9.8745954, 9.8745954]","[2020-03-02T07:30:00Z, 2020-03-02T07:30:00Z, 2...","[, , ]"
52298,3720c0950_r490904_20200302,"[7901, 47, 933]","[1, 1, 1]","[37.2771433, 37.2771433, 37.2771433]","[9.8745954, 9.8745954, 9.8745954]","[2020-03-02T07:30:00Z, 2020-03-02T07:30:00Z, 2...","[, , ]"
52299,3720c0950_r536040_20200302,"[7901, 933, 47]","[1, 1, 1]","[37.2771433, 37.2771433, 37.2771433]","[9.8745954, 9.8745954, 9.8745954]","[2020-03-02T07:30:00Z, 2020-03-02T07:30:00Z, 2...","[, , ]"


## Export


In [123]:
card_exp = pd.merge(
    card_chk,
    card_sp,
    on="CARD",
)

card_exp["Protocol"] = "F"
card_exp["ObserverEmail"] = "kenyabirdmap@naturekenya.org"
card_exp['ObserverNo'] = '22829'

card_exp['Hour1'] = ""
card_exp['Hour2'] = ""
card_exp['Hour3'] = ""
card_exp['Hour4'] = ""
card_exp['Hour5'] = ""
card_exp['Hour6'] = ""
card_exp['Hour7'] = ""
card_exp['Hour8'] = ""
card_exp['Hour9'] = ""
card_exp['Hour10'] = ""
card_exp['InclNight'] = "0"
card_exp['AllHabitats'] = "0"

card_exp['TotalHours'] = round(card_exp['DURATION MINUTES_sum']/60,2)
card_exp['TotalDistance'] = round(card_exp['EFFORT DISTANCE KM_sum'],2)
card_exp["TotalSpp"] = card_exp["ADU"].apply(lambda x: len(x))
card_exp['StartDate'] = card_exp['OBSERVATION DATETIME_min'].dt.date.apply(str)
card_exp['EndDate'] = card_exp['OBSERVATION DATETIME_max'].dt.date.apply(str)
card_exp['StartTime'] = card_exp['OBSERVATION DATETIME_min'].dt.strftime('%H:%M')

def create_records(ADU, SEQ, LATITUDE, LONGITUDE, OBSERVATION_DATETIME, EFFORT_DISTANCE_KM, CARD):
    return [{
        'Sequence': SEQ, 
        "Latitude": LATITUDE,
        "Longitude": LONGITUDE,
        "Altitude": "",
        "CardNo": CARD,
        "Spp": ADU,
        "Accuracy": EFFORT_DISTANCE_KM*1000,
        "SightingTime": OBSERVATION_DATETIME
        } for ADU, SEQ, LATITUDE, LONGITUDE, OBSERVATION_DATETIME, EFFORT_DISTANCE_KM in zip(ADU, SEQ, LATITUDE, LONGITUDE, OBSERVATION_DATETIME, EFFORT_DISTANCE_KM)]

# Apply the function to combine 'scores' and 'weights' into nested objects
card_exp['records'] = card_exp.apply(lambda row: create_records(row['ADU'], row['SEQ'], row['LATITUDE'], row['LONGITUDE'], row['OBSERVATION DATETIME'], row['EFFORT DISTANCE KM'], row['CARD']), axis=1)


card_exp = card_exp.rename(columns={
        'CARD':'CardNo',
        'PENTAD': 'Pentad',
        'SAMPLING EVENT IDENTIFIER': 'Checklists',
        'OBSERVER ID': 'ObserverNoEbird',
        'SAMPLING EVENT IDENTIFIER_list': 'Checklists',
    })

card_exp = card_exp.reindex(columns=['Protocol', 'ObserverEmail', 'CardNo', 'StartDate', 'EndDate', 'StartTime', 'Pentad', 'ObserverNo', 'TotalHours', 'Hour1', 'Hour2', 'Hour3', 'Hour4', 'Hour5', 'Hour6', 'Hour7', 'Hour8', 'Hour9', 'Hour10','TotalSpp' ,'InclNight', 'AllHabitats', 'Checklists', 'TotalDistance', 'ObserverNoEbird','records'])

card_exp

Unnamed: 0,Protocol,ObserverEmail,CardNo,StartDate,EndDate,StartTime,Pentad,ObserverNo,TotalHours,Hour1,...,Hour8,Hour9,Hour10,TotalSpp,InclNight,AllHabitats,Checklists,TotalDistance,ObserverNoEbird,records
0,F,kenyabirdmap@naturekenya.org,0000_0920_r431479_20240218,2024-02-18,2024-02-19,12:20,0000_0920,22829,2.22,,...,,,,26,0,0,"[S162608678, S162609512, S162852215, S16283088...",1.50,obsr431479,"[{'Sequence': 1, 'Latitude': -0.0392179, 'Long..."
1,F,kenyabirdmap@naturekenya.org,0000_1135_r344577_19970722,1997-07-22,1997-07-22,11:00,0000_1135,22829,2.00,,...,,,,5,0,0,[S13388886],1.00,obsr344577,"[{'Sequence': 1, 'Latitude': -0.0686645, 'Long..."
2,F,kenyabirdmap@naturekenya.org,0000_1140_r107968_19960708,1996-07-08,1996-07-08,16:30,0000_1140,22829,3.00,,...,,,,15,0,0,[S7529043],1.61,obsr107968,"[{'Sequence': 1, 'Latitude': -0.0700264, 'Long..."
3,F,kenyabirdmap@naturekenya.org,0000_1140_r167174_19960708,1996-07-08,1996-07-08,16:30,0000_1140,22829,3.00,,...,,,,15,0,0,[S7529228],1.61,obsr167174,"[{'Sequence': 1, 'Latitude': -0.0700264, 'Long..."
4,F,kenyabirdmap@naturekenya.org,0000_2930_r3868323_20230513,2023-05-13,2023-05-13,06:30,0000_2930,22829,3.00,,...,,,,22,0,0,[S137486656],2.00,obsr3868323,"[{'Sequence': 1, 'Latitude': -0.0459642, 'Long..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52296,F,kenyabirdmap@naturekenya.org,3715c1010_r536040_20220521,2022-05-21,2022-05-21,09:30,3715c1010,22829,4.50,,...,,,,16,0,0,[S110892947],0.00,obsr536040,"[{'Sequence': 1, 'Latitude': 37.1811082, 'Long..."
52297,F,kenyabirdmap@naturekenya.org,3720c0950_r1896095_20200302,2020-03-02,2020-03-02,07:30,3720c0950,22829,20.00,,...,,,,3,0,0,[S146633506],0.00,obsr1896095,"[{'Sequence': 1, 'Latitude': 37.2771433, 'Long..."
52298,F,kenyabirdmap@naturekenya.org,3720c0950_r490904_20200302,2020-03-02,2020-03-02,07:30,3720c0950,22829,20.00,,...,,,,3,0,0,[S65389557],0.00,obsr490904,"[{'Sequence': 1, 'Latitude': 37.2771433, 'Long..."
52299,F,kenyabirdmap@naturekenya.org,3720c0950_r536040_20200302,2020-03-02,2020-03-02,07:30,3720c0950,22829,20.00,,...,,,,3,0,0,[S65387334],0.00,obsr536040,"[{'Sequence': 1, 'Latitude': 37.2771433, 'Long..."


In [124]:
json_data = card_exp.to_json(orient='records', indent=2)
with open(f"../export/AFR_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", 'w') as f:
    f.write(json_data)