In [35]:
import pandas as pd
import requests

import re
pd.set_option('display.max_columns', None)


In [2]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata&size=100"
resmarineweb = requests.get(marineweb)


In [3]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata"
resmarineweb= requests.get(marineweb)

In [4]:
area = "https://api.obis.org/v3/area"
resarea= requests.get(area)

In [5]:
resarea.status_code

200

In [5]:
areadata = resarea.json()

In [10]:
dfarea = pd.DataFrame(areadata['results'])

In [14]:
areadata['results'][1].keys()

dict_keys(['id', 'name', 'type'])

In [11]:
dfarea.shape

(798, 3)

In [12]:
dfarea.head()

Unnamed: 0,id,name,type
0,10001,2les des Sept Frbres et Godorya (Seven Brother...,ebsa
1,1,ABNJ,abnj
2,6,ABNJ: Arctic Ocean,abnj
3,114,ABNJ: Indian Ocean,abnj
4,176,ABNJ: North Atlantic,abnj


In [6]:
especies = ["Neoturris%20pileata", "Megaptera%20novaeangliae","Phycocaris%20simulans", "Cheilinus%20undulatus", "Tursiops%20truncatus","Halichoerus%20grypus"]
dfs = []
columnas_por_especie = {}

In [25]:
for especie in especies:
    marineweb = f"https://api.obis.org/v3/occurrence?scientificname={especie}&size=100"
    
    try:
        resmarineweb = requests.get(marineweb)
        resmarineweb.raise_for_status()
        
        marinedata = resmarineweb.json()
        
        if marinedata.get("results"):
            # Definir las columnas deseadas
            columnas_deseadas = [
                "occurrenceID", "decimalLatitude", "decimalLongitude", "vernacularName", "scientificName", "basisOfRecord", "class", "subclass", 
                "family", "genus", "kingdom", "institutionCode", "species", "sex", "date_start", "date_end", 
                "date_year", "depth", "minimumDepthInMeters", "maximumDepthInMeters", 
                "organismQuantity", "organismQuantityType", "sampleSizeUnit", 
                "sampleSizeValue", "node_id", "absence", "flags", 
                "bathymetry", "shoredistance", "sst", "sss"
            ]
            
            # Obtener solo las columnas que existen en los datos
            df_temp = pd.DataFrame(marinedata["results"])
            columnas_existentes = list(set(columnas_deseadas).intersection(df_temp.columns))

            # Guardar información de columnas (sin imprimir todavía)
            columnas_por_especie[especie] = {
                'columnas_presentes': columnas_existentes,
                'columnas_faltantes': list(set(columnas_deseadas) - set(columnas_existentes))
            }
            
            # Crear DataFrame solo con las columnas disponibles
            df_especie = df_temp[columnas_existentes]
            
            dfs.append(df_especie)
            print(f"Datos obtenidos para {especie}: {len(df_especie)} ocurrencias")
        else:
            print(f"No se encontraron resultados para {especie}")
            
    except requests.exceptions.RequestException as e:
        print(f"Error al obtener datos para {especie}: {e}")
    except KeyError as e:
        print(f"Error en la estructura de datos para {especie}: {e}")

# Imprimir resultados de columnas faltantes SOLO UNA VEZ al final
print("\n--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---")
for especie, info in columnas_por_especie.items():
    if info['columnas_faltantes']:
        print(f"{especie}: Faltan {len(info['columnas_faltantes'])} columnas")
        print(f"   Faltantes: {info['columnas_faltantes']}")

Datos obtenidos para Neoturris%20pileata: 100 ocurrencias
Datos obtenidos para Megaptera%20novaeangliae: 100 ocurrencias
Datos obtenidos para Phycocaris%20simulans: 19 ocurrencias
Datos obtenidos para Cheilinus%20undulatus: 100 ocurrencias
Datos obtenidos para Tursiops%20truncatus: 100 ocurrencias
Datos obtenidos para Halichoerus%20grypus: 100 ocurrencias

--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---
Neoturris%20pileata: Faltan 1 columnas
   Faltantes: ['vernacularName']
Phycocaris%20simulans: Faltan 7 columnas
   Faltantes: ['minimumDepthInMeters', 'sampleSizeValue', 'vernacularName', 'sampleSizeUnit', 'maximumDepthInMeters', 'sex', 'depth']
Cheilinus%20undulatus: Faltan 2 columnas
   Faltantes: ['sex', 'subclass']
Tursiops%20truncatus: Faltan 3 columnas
   Faltantes: ['sampleSizeValue', 'sex', 'sampleSizeUnit']
Halichoerus%20grypus: Faltan 4 columnas
   Faltantes: ['sampleSizeValue', 'organismQuantityType', 'organismQuantity', 'sampleSizeUnit']


In [14]:
marinedata["results"][0].keys()

dict_keys(['associatedReferences', 'basisOfRecord', 'bibliographicCitation', 'catalogNumber', 'class', 'classid', 'collectionCode', 'coordinatePrecision', 'coordinateUncertaintyInMeters', 'datasetID', 'datasetName', 'dateIdentified', 'date_end', 'date_mid', 'date_start', 'date_year', 'decimalLatitude', 'decimalLongitude', 'eventDate', 'eventTime', 'family', 'familyid', 'footprintWKT', 'genus', 'genusid', 'geodeticDatum', 'georeferencedDate', 'identificationRemarks', 'individualCount', 'infraorder', 'infraorderid', 'infraphylum', 'infraphylumid', 'institutionCode', 'kingdom', 'kingdomid', 'license', 'marine', 'megaclass', 'megaclassid', 'modified', 'nomenclaturalCode', 'occurrenceID', 'occurrenceRemarks', 'occurrenceStatus', 'order', 'orderid', 'organismID', 'organismRemarks', 'ownerInstitutionCode', 'phylum', 'phylumid', 'recordNumber', 'scientificName', 'scientificNameAuthorship', 'scientificNameID', 'sex', 'species', 'speciesid', 'specificEpithet', 'subclass', 'subclassid', 'suborder

In [27]:
epiturras = pd.concat(dfs, ignore_index=True)

In [19]:
epiturras["institutionCode"].value_counts()

institutionCode
CEBC                                                                                    79
Happywhale.com                                                                          70
SeaWatchFoundation                                                                      31
USDOC/NOAA/NMFS/PIFSC/ESD                                                               24
AIMS                                                                                    23
CRED                                                                                    21
MAGNT                                                                                    9
Observatoire Océanologique de Villefranche sur Mer                                       9
SPC                                                                                      8
CMLRE                                                                                    5
NCL                                                                       

In [32]:
epiturras.sample(10)

Unnamed: 0,node_id,sst,basisOfRecord,date_start,kingdom,class,absence,bathymetry,depth,organismQuantity,sss,minimumDepthInMeters,subclass,date_end,flags,sampleSizeValue,organismQuantityType,species,shoredistance,family,genus,date_year,sampleSizeUnit,occurrenceID,maximumDepthInMeters,scientificName,sex,institutionCode,vernacularName,decimalLatitude,decimalLongitude
1031,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],12.48,MachineObservation,1340582000000.0,Animalia,Mammalia,False,-2.4,,,34.11,,Theria,1340582000000.0,[NO_DEPTH],,,Halichoerus grypus,11876,Phocidae,Halichoerus,2012.0,,2026_33632,,Halichoerus grypus,,,,,
2464,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],9.99,HumanObservation,1459210000000.0,Animalia,Mammalia,False,-1.8,0.0,,34.86,0.0,Theria,1459210000000.0,[],,,Tursiops truncatus,686,Delphinidae,Tursiops,2016.0,,SWF_13_108748_1,0.0,Tursiops truncatus,,SeaWatchFoundation,Bottlenose dolphin,58.28605,-3.285733
265,[2a57cd59-6799-4579-955e-27c9af97aea4],26.34,HumanObservation,1259194000000.0,Animalia,Teleostei,False,7.01,5.0,1.0,35.0,0.0,,1259194000000.0,[],,individuals,Cheilinus undulatus,24174,Labridae,Cheilinus,2009.0,,urn:catalog:AIMS:Fish_LTMP:6c3f2630-db2e-11de-...,10.0,,,,,,
1632,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],12.86,MaterialSample,1443571000000.0,Animalia,Hydrozoa,False,51.0,10.0,0.0,34.79,10.0,Hydroidolina,1443571000000.0,[],50058.0,DNA sequence reads,Neoturris pileata,6836,Pandeidae,Neoturris,2015.0,DNA reads,758931,10.0,Neoturris pileata,,,,50.25,
2486,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],23.28,HumanObservation,1559520000000.0,Animalia,Mammalia,False,2.0,,,30.71,,Theria,1559520000000.0,[NO_DEPTH],,,Tursiops truncatus,2566,Delphinidae,Tursiops,2019.0,,2166_1412,,Tursiops truncatus,,TMMSN,Common Bottlenose Dolphin,29.39151,-94.81288
2519,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],12.95,MachineObservation,1375488000000.0,Animalia,Mammalia,False,-8.0,,,35.15,,Theria,1375488000000.0,[NO_DEPTH],,,Halichoerus grypus,50,Phocidae,Halichoerus,2013.0,,2028_118115,,Halichoerus grypus,Male,CEBC,Atlantic gray seal,48.54559,-4.74952
1430,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],11.98,HumanObservation,745459200000.0,Animalia,Mammalia,False,17.2,0.0,,34.07,0.0,Theria,745459200000.0,[],,,Tursiops truncatus,905,Delphinidae,Tursiops,1993.0,,SWF_13_159962_1,0.0,Tursiops truncatus,,SeaWatchFoundation,Bottlenose dolphin,,
1950,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],23.28,HumanObservation,1467936000000.0,Animalia,Mammalia,False,5.0,,,30.6,,Theria,1467936000000.0,[NO_DEPTH],,,Tursiops truncatus,2495,Delphinidae,Tursiops,2016.0,,2166_1790,,Tursiops truncatus,,TMMSN,,29.40018,
2563,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],12.65,MachineObservation,1344298000000.0,Animalia,Mammalia,False,51.0,,,34.05,,Theria,1344298000000.0,[NO_DEPTH],,,Halichoerus grypus,13083,Phocidae,Halichoerus,2012.0,,2026_8993,,Halichoerus grypus,Male,CEBC,Atlantic gray seal,50.61687,0.25869
165,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],26.01,HumanObservation,1080086000000.0,Animalia,Mammalia,False,62.0,,,34.33,,Theria,1080086000000.0,[NO_DEPTH],,,Megaptera novaeangliae,1956,Balaenopteridae,Megaptera,2004.0,,231910,,,,,,,


In [34]:
epiturras.shape

(519, 27)

In [35]:
epiturras.columns

Index(['date_end', 'minimumDepthInMeters', 'shoredistance', 'bathymetry',
       'occurrenceID', 'sst', 'sampleSizeValue', 'organismQuantity', 'kingdom',
       'sampleSizeUnit', 'scientificName', 'subclass', 'flags', 'class',
       'organismQuantityType', 'sss', 'node_id', 'maximumDepthInMeters',
       'date_year', 'depth', 'family', 'absence', 'genus', 'basisOfRecord',
       'date_start', 'dropped', 'species'],
      dtype='object')

In [None]:
#milisegundos desde el 1 de enero de 1970
#medida Unix

In [39]:
epiturras['date_start'].value_counts()

date_start
-1.688342e+12    5
-1.704154e+12    3
 1.270685e+12    2
 1.378512e+12    2
 1.320624e+12    2
                ..
 1.682381e+12    1
 1.630973e+12    1
 1.719619e+12    1
 2.996352e+11    1
 1.307146e+12    1
Name: count, Length: 489, dtype: int64

In [38]:
epiturras['date_year'].value_counts()

date_year
2012.0    49
2011.0    36
2013.0    35
2023.0    24
2015.0    24
2009.0    19
2016.0    19
2006.0    18
2018.0    18
2022.0    17
2004.0    17
2024.0    16
2014.0    16
2010.0    16
2017.0    14
2007.0    14
2008.0    14
2002.0    11
2021.0    11
2020.0    10
2001.0    10
2003.0     9
2005.0     9
1916.0     9
1993.0     7
1991.0     6
1998.0     5
1979.0     4
1982.0     4
1995.0     4
1992.0     4
2000.0     3
2019.0     3
1899.0     3
1997.0     3
1985.0     2
1996.0     2
1958.0     2
1959.0     2
1999.0     2
2025.0     2
1960.0     2
1990.0     2
1984.0     2
1986.0     2
1981.0     2
1908.0     1
1954.0     1
1987.0     1
1822.0     1
1911.0     1
1896.0     1
1930.0     1
1910.0     1
1901.0     1
1912.0     1
1994.0     1
Name: count, dtype: int64

In [36]:
epiturras['dropped'].value_counts()

dropped
False    519
Name: count, dtype: int64

In [37]:
epiturras.sample(5)

Unnamed: 0,date_end,minimumDepthInMeters,shoredistance,bathymetry,occurrenceID,sst,sampleSizeValue,organismQuantity,kingdom,sampleSizeUnit,scientificName,subclass,flags,class,organismQuantityType,sss,node_id,maximumDepthInMeters,date_year,depth,family,absence,genus,basisOfRecord,date_start,dropped,species
360,1659398000000.0,0.0,384,3.0,SWF_13_55082_3,10.19,,,Animalia,,Tursiops truncatus,Theria,[],Mammalia,,34.39,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],0.0,2022.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1659398000000.0,False,Tursiops truncatus
452,744076800000.0,,4066,10.0,71_234,9.83,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,33.78,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,1993.0,,Phocidae,False,Halichoerus,MachineObservation,744076800000.0,False,Halichoerus grypus
354,1462925000000.0,0.0,2367,117.0,58691431-732b-4382-9b43-6553d4fc95a1,19.08,,1.0,Animalia,,Tursiops truncatus,Theria,[],Mammalia,individuals,36.24,[4bf79a01-65a9-4db6-b37b-18434f26ddfc],0.0,2016.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1462925000000.0,False,Tursiops truncatus
433,1381882000000.0,,4174,78.4,2028_112699,12.94,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,35.15,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2013.0,,Phocidae,False,Halichoerus,MachineObservation,1381882000000.0,False,Halichoerus grypus
139,1645574000000.0,,3800,50.0,291222,25.38,,,Animalia,,Megaptera novaeangliae,Theria,[NO_DEPTH],Mammalia,,34.99,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2022.0,,Balaenopteridae,False,Megaptera,HumanObservation,1645574000000.0,False,Megaptera novaeangliae


In [33]:
epiturras.to_csv("marinedata.csv")

In [None]:
#guardar el df a csv