In [None]:
import pandas as pd
import requests

import re
pd.set_option('display.max_columns', None)



In [17]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata&size=100"
resmarineweb = requests.get(marineweb)


In [3]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata"
resmarineweb= requests.get(marineweb)

In [2]:
area = "https://api.obis.org/v3/area"
resarea= requests.get(area)

In [4]:
resarea.status_code

200

In [5]:
areadata = resarea.json()

In [10]:
dfarea = pd.DataFrame(areadata['results'])

In [14]:
areadata['results'][1].keys()

dict_keys(['id', 'name', 'type'])

In [11]:
dfarea.shape

(798, 3)

In [12]:
dfarea.head()

Unnamed: 0,id,name,type
0,10001,2les des Sept Frbres et Godorya (Seven Brother...,ebsa
1,1,ABNJ,abnj
2,6,ABNJ: Arctic Ocean,abnj
3,114,ABNJ: Indian Ocean,abnj
4,176,ABNJ: North Atlantic,abnj


In [30]:
especies = ["Neoturris%20pileata", "Megaptera%20novaeangliae","Phycocaris%20simulans", "Cheilinus%20undulatus", "Tursiops%20truncatus","Halichoerus%20grypus"]
dfs = []
columnas_por_especie = {}

In [31]:
for especie in especies:
    marineweb = f"https://api.obis.org/v3/occurrence?scientificname={especie}&size=100"
    
    try:
        resmarineweb = requests.get(marineweb)
        resmarineweb.raise_for_status()
        
        marinedata = resmarineweb.json()
        
        if marinedata.get("results"):
            # Definir las columnas deseadas
            columnas_deseadas = [
                "occurrenceID", "scientificName", "basisOfRecord", "class", "subclass", 
                "family", "genus", "kingdom", "species", "date_start", "date_end", 
                "date_year", "depth", "minimumDepthInMeters", "maximumDepthInMeters", 
                "organismQuantity", "organismQuantityType", "sampleSizeUnit", 
                "sampleSizeValue", "node_id", "dropped", "absence", "flags", 
                "bathymetry", "shoredistance", "sst", "sss"
            ]
            
            # Obtener solo las columnas que existen en los datos
            df_temp = pd.DataFrame(marinedata["results"])
            columnas_existentes = list(set(columnas_deseadas).intersection(df_temp.columns))

            # Guardar información de columnas (sin imprimir todavía)
            columnas_por_especie[especie] = {
                'columnas_presentes': columnas_existentes,
                'columnas_faltantes': list(set(columnas_deseadas) - set(columnas_existentes))
            }
            
            # Crear DataFrame solo con las columnas disponibles
            df_especie = df_temp[columnas_existentes]
            
            dfs.append(df_especie)
            print(f"Datos obtenidos para {especie}: {len(df_especie)} ocurrencias")
        else:
            print(f"No se encontraron resultados para {especie}")
            
    except requests.exceptions.RequestException as e:
        print(f"Error al obtener datos para {especie}: {e}")
    except KeyError as e:
        print(f"Error en la estructura de datos para {especie}: {e}")

# Imprimir resultados de columnas faltantes SOLO UNA VEZ al final
print("\n--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---")
for especie, info in columnas_por_especie.items():
    if info['columnas_faltantes']:
        print(f"{especie}: Faltan {len(info['columnas_faltantes'])} columnas")
        print(f"   Faltantes: {info['columnas_faltantes']}")

Datos obtenidos para Neoturris%20pileata: 100 ocurrencias
Datos obtenidos para Megaptera%20novaeangliae: 100 ocurrencias
Datos obtenidos para Phycocaris%20simulans: 19 ocurrencias
Datos obtenidos para Cheilinus%20undulatus: 100 ocurrencias
Datos obtenidos para Tursiops%20truncatus: 100 ocurrencias
Datos obtenidos para Halichoerus%20grypus: 100 ocurrencias

--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---
Phycocaris%20simulans: Faltan 5 columnas
   Faltantes: ['minimumDepthInMeters', 'sampleSizeUnit', 'depth', 'maximumDepthInMeters', 'sampleSizeValue']
Cheilinus%20undulatus: Faltan 1 columnas
   Faltantes: ['subclass']
Tursiops%20truncatus: Faltan 2 columnas
   Faltantes: ['sampleSizeUnit', 'sampleSizeValue']
Halichoerus%20grypus: Faltan 4 columnas
   Faltantes: ['organismQuantity', 'sampleSizeUnit', 'sampleSizeValue', 'organismQuantityType']


In [32]:
epiturras = pd.concat(dfs, ignore_index=True)

In [33]:
epiturras.head()

Unnamed: 0,date_end,minimumDepthInMeters,shoredistance,bathymetry,occurrenceID,sst,sampleSizeValue,organismQuantity,kingdom,sampleSizeUnit,scientificName,subclass,flags,class,organismQuantityType,sss,node_id,maximumDepthInMeters,date_year,depth,family,absence,genus,basisOfRecord,date_start,dropped,species
0,1087171000000.0,10.0,6836,51.0,6911,12.86,68429,0,Animalia,DNA reads,Neoturris pileata,Hydroidolina,[],Hydrozoa,DNA sequence reads,34.79,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],10.0,2004.0,10.0,Pandeidae,False,Neoturris,MaterialSample,1087171000000.0,False,Neoturris pileata
1,1092010000000.0,10.0,6836,51.0,20764,12.86,14632,0,Animalia,DNA reads,Neoturris pileata,Hydroidolina,[],Hydrozoa,DNA sequence reads,34.79,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],10.0,2004.0,10.0,Pandeidae,False,Neoturris,MaterialSample,1092010000000.0,False,Neoturris pileata
2,1184026000000.0,10.0,6836,51.0,206790,12.86,12014,539,Animalia,DNA reads,Neoturris pileata,Hydroidolina,[],Hydrozoa,DNA sequence reads,34.79,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],10.0,2007.0,10.0,Pandeidae,False,Neoturris,MaterialSample,1184026000000.0,False,Neoturris pileata
3,1445472000000.0,10.0,6836,51.0,766847,12.86,46899,0,Animalia,DNA reads,Neoturris pileata,Hydroidolina,[],Hydrozoa,DNA sequence reads,34.79,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],10.0,2015.0,10.0,Pandeidae,False,Neoturris,MaterialSample,1445472000000.0,False,Neoturris pileata
4,1137370000000.0,10.0,6836,51.0,80134,12.86,58996,0,Animalia,DNA reads,Neoturris pileata,Hydroidolina,[],Hydrozoa,DNA sequence reads,34.79,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],10.0,2006.0,10.0,Pandeidae,False,Neoturris,MaterialSample,1137370000000.0,False,Neoturris pileata


In [34]:
epiturras.shape

(519, 27)

In [35]:
epiturras.columns

Index(['date_end', 'minimumDepthInMeters', 'shoredistance', 'bathymetry',
       'occurrenceID', 'sst', 'sampleSizeValue', 'organismQuantity', 'kingdom',
       'sampleSizeUnit', 'scientificName', 'subclass', 'flags', 'class',
       'organismQuantityType', 'sss', 'node_id', 'maximumDepthInMeters',
       'date_year', 'depth', 'family', 'absence', 'genus', 'basisOfRecord',
       'date_start', 'dropped', 'species'],
      dtype='object')

In [None]:
#milisegundos desde el 1 de enero de 1970
#medida Unix

In [39]:
epiturras['date_start'].value_counts()

date_start
-1.688342e+12    5
-1.704154e+12    3
 1.270685e+12    2
 1.378512e+12    2
 1.320624e+12    2
                ..
 1.682381e+12    1
 1.630973e+12    1
 1.719619e+12    1
 2.996352e+11    1
 1.307146e+12    1
Name: count, Length: 489, dtype: int64

In [38]:
epiturras['date_year'].value_counts()

date_year
2012.0    49
2011.0    36
2013.0    35
2023.0    24
2015.0    24
2009.0    19
2016.0    19
2006.0    18
2018.0    18
2022.0    17
2004.0    17
2024.0    16
2014.0    16
2010.0    16
2017.0    14
2007.0    14
2008.0    14
2002.0    11
2021.0    11
2020.0    10
2001.0    10
2003.0     9
2005.0     9
1916.0     9
1993.0     7
1991.0     6
1998.0     5
1979.0     4
1982.0     4
1995.0     4
1992.0     4
2000.0     3
2019.0     3
1899.0     3
1997.0     3
1985.0     2
1996.0     2
1958.0     2
1959.0     2
1999.0     2
2025.0     2
1960.0     2
1990.0     2
1984.0     2
1986.0     2
1981.0     2
1908.0     1
1954.0     1
1987.0     1
1822.0     1
1911.0     1
1896.0     1
1930.0     1
1910.0     1
1901.0     1
1912.0     1
1994.0     1
Name: count, dtype: int64

In [36]:
epiturras['dropped'].value_counts()

dropped
False    519
Name: count, dtype: int64

In [37]:
epiturras.sample(5)

Unnamed: 0,date_end,minimumDepthInMeters,shoredistance,bathymetry,occurrenceID,sst,sampleSizeValue,organismQuantity,kingdom,sampleSizeUnit,scientificName,subclass,flags,class,organismQuantityType,sss,node_id,maximumDepthInMeters,date_year,depth,family,absence,genus,basisOfRecord,date_start,dropped,species
360,1659398000000.0,0.0,384,3.0,SWF_13_55082_3,10.19,,,Animalia,,Tursiops truncatus,Theria,[],Mammalia,,34.39,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],0.0,2022.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1659398000000.0,False,Tursiops truncatus
452,744076800000.0,,4066,10.0,71_234,9.83,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,33.78,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,1993.0,,Phocidae,False,Halichoerus,MachineObservation,744076800000.0,False,Halichoerus grypus
354,1462925000000.0,0.0,2367,117.0,58691431-732b-4382-9b43-6553d4fc95a1,19.08,,1.0,Animalia,,Tursiops truncatus,Theria,[],Mammalia,individuals,36.24,[4bf79a01-65a9-4db6-b37b-18434f26ddfc],0.0,2016.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1462925000000.0,False,Tursiops truncatus
433,1381882000000.0,,4174,78.4,2028_112699,12.94,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,35.15,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2013.0,,Phocidae,False,Halichoerus,MachineObservation,1381882000000.0,False,Halichoerus grypus
139,1645574000000.0,,3800,50.0,291222,25.38,,,Animalia,,Megaptera novaeangliae,Theria,[NO_DEPTH],Mammalia,,34.99,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2022.0,,Balaenopteridae,False,Megaptera,HumanObservation,1645574000000.0,False,Megaptera novaeangliae


In [None]:
#guardar el df a csv