In [24]:
import pandas as pd
import requests

import re
pd.set_option('display.max_columns', None)


In [25]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata&size=100"
resmarineweb = requests.get(marineweb)


In [26]:
marineweb = "https://api.obis.org/v3/occurrence?scientificname=Neoturris%20pileata"
resmarineweb= requests.get(marineweb)

In [27]:
area = "https://api.obis.org/v3/area"
resarea= requests.get(area)

In [28]:
resarea.status_code

200

In [29]:
areadata = resarea.json()

In [30]:
dfarea = pd.DataFrame(areadata['results'])

In [31]:
areadata['results'][1].keys()

dict_keys(['id', 'name', 'type'])

In [32]:
dfarea.shape

(798, 3)

In [33]:
dfarea.head()

Unnamed: 0,id,name,type
0,10001,2les des Sept Frbres et Godorya (Seven Brother...,ebsa
1,1,ABNJ,abnj
2,6,ABNJ: Arctic Ocean,abnj
3,114,ABNJ: Indian Ocean,abnj
4,176,ABNJ: North Atlantic,abnj


In [34]:
especies = ["Neoturris%20pileata", "Megaptera%20novaeangliae","Phycocaris%20simulans", "Cheilinus%20undulatus", "Tursiops%20truncatus","Halichoerus%20grypus"]
dfs = []
columnas_por_especie = {}

In [None]:
for especie in especies:
    marineweb = f"https://api.obis.org/v3/occurrence?scientificname={especie}&size=100"
    
    try:
        resmarineweb = requests.get(marineweb)
        resmarineweb.raise_for_status()
        
        marinedata = resmarineweb.json()
        
        if marinedata.get("results"):
            # Definir las columnas deseadas
            columnas_deseadas = [
                "occurrenceID", "decimalLatitude", "decimalLongitude", "vernacularName", "scientificName", "basisOfRecord", "class", "subclass", 
                "family", "genus", "kingdom", "institutionCode", "species", "sex", "date_start", "date_end", 
                "date_year", "depth", "minimumDepthInMeters", "maximumDepthInMeters", 
                "organismQuantity", "organismQuantityType", "sampleSizeUnit", 
                "sampleSizeValue", "node_id", "absence", "flags", 
                "bathymetry", "shoredistance", "sst", "sss", "waterBody"
            ]
            
            # Obtener solo las columnas que existen en los datos
            df_temp = pd.DataFrame(marinedata["results"])
            columnas_existentes = list(set(columnas_deseadas).intersection(df_temp.columns))

            # Guardar información de columnas
            columnas_por_especie[especie] = {
                'columnas_presentes': columnas_existentes,
                'columnas_faltantes': list(set(columnas_deseadas) - set(columnas_existentes))
            }
            
            # Crear DataFrame solo con las columnas disponibles
            df_especie = df_temp[columnas_existentes]
            
            dfs.append(df_especie)
            print(f"Datos obtenidos para {especie}: {len(df_especie)} ocurrencias")
        else:
            print(f"No se encontraron resultados para {especie}")
            
    except requests.exceptions.RequestException as e:
        print(f"Error al obtener datos para {especie}: {e}")
    except KeyError as e:
        print(f"Error en la estructura de datos para {especie}: {e}")

# Imprimir resultados de columnas faltantes
print("\n--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---")
for especie, info in columnas_por_especie.items():
    if info['columnas_faltantes']:
        print(f"{especie}: Faltan {len(info['columnas_faltantes'])} columnas")
        print(f"   Faltantes: {info['columnas_faltantes']}")

Datos obtenidos para Neoturris%20pileata: 100 ocurrencias
Datos obtenidos para Megaptera%20novaeangliae: 100 ocurrencias
Datos obtenidos para Phycocaris%20simulans: 19 ocurrencias
Datos obtenidos para Cheilinus%20undulatus: 100 ocurrencias
Datos obtenidos para Tursiops%20truncatus: 100 ocurrencias
Datos obtenidos para Halichoerus%20grypus: 100 ocurrencias

--- RESUMEN DE COLUMNAS FALTANTES POR ESPECIE ---
Neoturris%20pileata: Faltan 2 columnas
   Faltantes: ['waterBody', 'vernacularName']
Phycocaris%20simulans: Faltan 8 columnas
   Faltantes: ['maximumDepthInMeters', 'sex', 'sampleSizeUnit', 'waterBody', 'depth', 'vernacularName', 'minimumDepthInMeters', 'sampleSizeValue']
Cheilinus%20undulatus: Faltan 2 columnas
   Faltantes: ['subclass', 'sex']
Tursiops%20truncatus: Faltan 3 columnas
   Faltantes: ['sex', 'sampleSizeUnit', 'sampleSizeValue']
Halichoerus%20grypus: Faltan 4 columnas
   Faltantes: ['organismQuantity', 'sampleSizeValue', 'organismQuantityType', 'sampleSizeUnit']


In [36]:
marinedata["results"][0].keys()

dict_keys(['associatedReferences', 'basisOfRecord', 'bibliographicCitation', 'catalogNumber', 'class', 'classid', 'collectionCode', 'coordinatePrecision', 'coordinateUncertaintyInMeters', 'datasetID', 'datasetName', 'dateIdentified', 'date_end', 'date_mid', 'date_start', 'date_year', 'decimalLatitude', 'decimalLongitude', 'eventDate', 'eventTime', 'family', 'familyid', 'footprintWKT', 'genus', 'genusid', 'geodeticDatum', 'georeferencedDate', 'identificationRemarks', 'individualCount', 'infraorder', 'infraorderid', 'infraphylum', 'infraphylumid', 'institutionCode', 'kingdom', 'kingdomid', 'license', 'marine', 'megaclass', 'megaclassid', 'modified', 'nomenclaturalCode', 'occurrenceID', 'occurrenceRemarks', 'occurrenceStatus', 'order', 'orderid', 'organismID', 'organismRemarks', 'ownerInstitutionCode', 'phylum', 'phylumid', 'recordNumber', 'scientificName', 'scientificNameAuthorship', 'scientificNameID', 'sex', 'species', 'speciesid', 'specificEpithet', 'subclass', 'subclassid', 'suborder

In [37]:
epiturras = pd.concat(dfs, ignore_index=True)

In [38]:
epiturras["institutionCode"].value_counts()

institutionCode
CEBC                                                                                    79
Happywhale.com                                                                          70
SeaWatchFoundation                                                                      31
USDOC/NOAA/NMFS/PIFSC/ESD                                                               24
AIMS                                                                                    23
CRED                                                                                    21
MAGNT                                                                                    9
Observatoire Océanologique de Villefranche sur Mer                                       9
SPC                                                                                      8
CMLRE                                                                                    5
Diveboard                                                                 

In [39]:
epiturras.sample(10)

Unnamed: 0,date_year,sampleSizeUnit,class,scientificName,depth,institutionCode,shoredistance,decimalLatitude,flags,decimalLongitude,maximumDepthInMeters,absence,date_start,minimumDepthInMeters,subclass,basisOfRecord,sss,sex,organismQuantity,sst,family,kingdom,occurrenceID,bathymetry,node_id,date_end,organismQuantityType,species,sampleSizeValue,genus,waterBody,vernacularName
438,2011.0,,Mammalia,Halichoerus grypus,,CEBC,28508,55.68971,[NO_DEPTH],-7.18027,,False,1315008000000.0,,Theria,MachineObservation,35.09,Male,,11.18,Phocidae,Animalia,2028_78485,54.4,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],1315008000000.0,,Halichoerus grypus,,Halichoerus,"Molene Archipelago,Iroise Sea,English Channel,...",Atlantic gray seal
223,1960.0,,Teleostei,Cheilinus undulatus,0.915,CAS,494,9.574444,[],138.174722,1.83,False,-314496000000.0,0.0,,PreservedSpecimen,33.95,,,29.15,Labridae,Animalia,urn:catalog:CAS:ICH:229387,-9.0,[b7c47783-a020-4173-b390-7b57c4fa1426],-314496000000.0,,Cheilinus undulatus,,Cheilinus,,
389,2010.0,,Mammalia,Tursiops truncatus,0.0,,517,57.720833,[],-3.270333,0.0,False,1273795000000.0,0.0,Theria,HumanObservation,34.36,,,10.05,Delphinidae,Animalia,SWF_13_93918_1,2.4,[4bf79a01-65a9-4db6-b37b-18434f26ddfc],1273795000000.0,,Tursiops truncatus,,Tursiops,,
244,2007.0,,Teleostei,Cheilinus undulatus,,SPC,5438,10.406817,[NO_DEPTH],169.913683,,False,1187741000000.0,,,HumanObservation,34.36,,,28.34,Labridae,Animalia,A765B15C-9E7B-442D-8679-0CC71D231284_R_984A4E5...,2.0,[6c17c09e-5cc2-4d5a-8463-e866731d35a1],1187741000000.0,,Cheilinus undulatus,,Cheilinus,Pacific Ocean,
171,2020.0,,Mammalia,Megaptera novaeangliae,,Happywhale.com,13123,36.639157,[NO_DEPTH],-122.106802,,False,1583453000000.0,,Theria,HumanObservation,32.9,Female,,12.83,Balaenopteridae,Animalia,105889,1733.0,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],1583453000000.0,,Megaptera novaeangliae,,Megaptera,North Pacific Ocean,Humpback whale
150,2023.0,,Mammalia,Megaptera novaeangliae,,Happywhale.com,1533,65.653831,[NO_DEPTH],-21.556549,,False,1696810000000.0,,Theria,HumanObservation,34.57,,,4.95,Balaenopteridae,Animalia,472066,6.4,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],1696810000000.0,,Megaptera novaeangliae,,Megaptera,Greenland Sea,Humpback whale
68,1912.0,,Hydrozoa,Neoturris pileata,0.0,Observatoire Océanologique de Villefranche sur...,356,43.692899,[],7.316554,0.0,False,-1822522000000.0,0.0,Hydroidolina,HumanObservation,38.11,,,18.48,Pandeidae,Animalia,OBSVLFR_HistoricalPointB_Surf-18530,56.0,[1ad35eb9-c615-4733-864a-b585aebcfb70],-1820016000000.0,,Neoturris pileata,,Neoturris,,
409,1991.0,,Mammalia,Tursiops truncatus,0.0,,189,57.574722,[],-4.0875,0.0,False,675820800000.0,0.0,Theria,HumanObservation,34.0,,,9.75,Delphinidae,Animalia,SWF_13_293305_1,19.6,[4bf79a01-65a9-4db6-b37b-18434f26ddfc],675820800000.0,,Tursiops truncatus,,Tursiops,,
60,2007.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1174867000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0.0,12.86,Pandeidae,Animalia,183042,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1174867000000.0,DNA sequence reads,Neoturris pileata,108501.0,Neoturris,,
189,,,Mammalia,Megaptera novaeangliae,,,6021,44.32017,[NO_DEPTH],-66.40767,,False,,,Theria,HumanObservation,30.86,,,8.75,Balaenopteridae,Animalia,215168,21.0,[7dfb2d90-9317-434d-8d4e-64adf324579a],,,Megaptera novaeangliae,,Megaptera,,WHALE-HUMPBACK


In [40]:
epiturras.shape

(519, 32)

In [41]:
epiturras.columns

Index(['date_year', 'sampleSizeUnit', 'class', 'scientificName', 'depth',
       'institutionCode', 'shoredistance', 'decimalLatitude', 'flags',
       'decimalLongitude', 'maximumDepthInMeters', 'absence', 'date_start',
       'minimumDepthInMeters', 'subclass', 'basisOfRecord', 'sss', 'sex',
       'organismQuantity', 'sst', 'family', 'kingdom', 'occurrenceID',
       'bathymetry', 'node_id', 'date_end', 'organismQuantityType', 'species',
       'sampleSizeValue', 'genus', 'waterBody', 'vernacularName'],
      dtype='object')

In [42]:
#milisegundos desde el 1 de enero de 1970
#medida Unix

In [43]:
epiturras['date_start'].value_counts()

date_start
-1.688342e+12    5
-1.704154e+12    3
 1.270685e+12    2
 1.378512e+12    2
 1.320624e+12    2
                ..
 1.682381e+12    1
 1.630973e+12    1
 1.719619e+12    1
 2.996352e+11    1
 1.307146e+12    1
Name: count, Length: 489, dtype: int64

In [44]:
epiturras['date_year'].value_counts()

date_year
2012.0    49
2011.0    36
2013.0    35
2023.0    24
2015.0    24
2009.0    19
2016.0    19
2006.0    18
2018.0    18
2022.0    17
2004.0    17
2024.0    16
2014.0    16
2010.0    16
2017.0    14
2007.0    14
2008.0    14
2002.0    11
2021.0    11
2020.0    10
2001.0    10
2003.0     9
2005.0     9
1916.0     9
1993.0     7
1991.0     6
1998.0     5
1979.0     4
1982.0     4
1995.0     4
1992.0     4
2000.0     3
2019.0     3
1899.0     3
1997.0     3
1985.0     2
1996.0     2
1958.0     2
1959.0     2
1999.0     2
2025.0     2
1960.0     2
1990.0     2
1984.0     2
1986.0     2
1981.0     2
1908.0     1
1954.0     1
1987.0     1
1822.0     1
1911.0     1
1896.0     1
1930.0     1
1910.0     1
1901.0     1
1912.0     1
1994.0     1
Name: count, dtype: int64

In [None]:
epiturras['dropped'].value_counts()

In [None]:
epiturras.sample(5)

Unnamed: 0,date_end,minimumDepthInMeters,shoredistance,bathymetry,occurrenceID,sst,sampleSizeValue,organismQuantity,kingdom,sampleSizeUnit,scientificName,subclass,flags,class,organismQuantityType,sss,node_id,maximumDepthInMeters,date_year,depth,family,absence,genus,basisOfRecord,date_start,dropped,species
360,1659398000000.0,0.0,384,3.0,SWF_13_55082_3,10.19,,,Animalia,,Tursiops truncatus,Theria,[],Mammalia,,34.39,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],0.0,2022.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1659398000000.0,False,Tursiops truncatus
452,744076800000.0,,4066,10.0,71_234,9.83,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,33.78,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,1993.0,,Phocidae,False,Halichoerus,MachineObservation,744076800000.0,False,Halichoerus grypus
354,1462925000000.0,0.0,2367,117.0,58691431-732b-4382-9b43-6553d4fc95a1,19.08,,1.0,Animalia,,Tursiops truncatus,Theria,[],Mammalia,individuals,36.24,[4bf79a01-65a9-4db6-b37b-18434f26ddfc],0.0,2016.0,0.0,Delphinidae,False,Tursiops,HumanObservation,1462925000000.0,False,Tursiops truncatus
433,1381882000000.0,,4174,78.4,2028_112699,12.94,,,Animalia,,Halichoerus grypus,Theria,[NO_DEPTH],Mammalia,,35.15,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2013.0,,Phocidae,False,Halichoerus,MachineObservation,1381882000000.0,False,Halichoerus grypus
139,1645574000000.0,,3800,50.0,291222,25.38,,,Animalia,,Megaptera novaeangliae,Theria,[NO_DEPTH],Mammalia,,34.99,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],,2022.0,,Balaenopteridae,False,Megaptera,HumanObservation,1645574000000.0,False,Megaptera novaeangliae


In [None]:
url = "https://api.obis.org/v3/node"
res = requests.get(url).json()

nodes_df = pd.DataFrame(res["results"])

In [61]:
epiturras['node_id'][0]

['f92d5d7f-47a6-4605-9fd0-a8538dfde3fd']

In [62]:
def limpiar_lista(dato):
    return dato[0]

In [63]:
epiturras['node_id'].apply(limpiar_lista)

0      f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
1      f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
2      f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
3      f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
4      f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
                       ...                 
514    573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31
515    573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31
516    573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31
517    4bf79a01-65a9-4db6-b37b-18434f26ddfc
518    573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31
Name: node_id, Length: 519, dtype: object

In [64]:
epiturras['id_node'] = epiturras['node_id'].apply(limpiar_lista)

In [65]:
epiturras.head()

Unnamed: 0,date_year,sampleSizeUnit,class,scientificName,depth,institutionCode,shoredistance,decimalLatitude,flags,decimalLongitude,maximumDepthInMeters,absence,date_start,minimumDepthInMeters,subclass,basisOfRecord,sss,sex,organismQuantity,sst,family,kingdom,occurrenceID,bathymetry,node_id,date_end,organismQuantityType,species,sampleSizeValue,genus,waterBody,vernacularName,id_node
0,2004.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1087171000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0,12.86,Pandeidae,Animalia,6911,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1087171000000.0,DNA sequence reads,Neoturris pileata,68429,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
1,2004.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1092010000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0,12.86,Pandeidae,Animalia,20764,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1092010000000.0,DNA sequence reads,Neoturris pileata,14632,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
2,2007.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1184026000000.0,10.0,Hydroidolina,MaterialSample,34.79,,539,12.86,Pandeidae,Animalia,206790,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1184026000000.0,DNA sequence reads,Neoturris pileata,12014,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
3,2015.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1445472000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0,12.86,Pandeidae,Animalia,766847,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1445472000000.0,DNA sequence reads,Neoturris pileata,46899,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd
4,2006.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1137370000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0,12.86,Pandeidae,Animalia,80134,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1137370000000.0,DNA sequence reads,Neoturris pileata,58996,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd


In [54]:
nodes_df.columns

Index(['id', 'name', 'description', 'theme', 'url', 'type', 'lon', 'lat',
       'feeds', 'contacts'],
      dtype='object')

In [66]:
df_unido = epiturras.merge(nodes_df, how='left', left_on="id_node", right_on="id")

In [73]:
df_unido['description'].unique()

array(['The UK OBIS Node is hosted at the Marine Biological Association (MBA) in Plymouth, UK.  The MBA is one of the world’s longest-running societies dedicated to promoting research into our oceans and the life they support. Since 1884 we have been providing a unified, clear, independent voice on behalf of the marine biological community and currently have a growing membership in over 40 countries. We also run a leading marine biological research laboratory where many eminent scientists - including 7 Nobel prize winners - have carried out their research.',
       'MedOBIS, the Regional OBIS Node for the Mediterranean Sea, is hosted by the Institute of Marine Biology, Biotechnology and Aquaculture https://imbbc.hcmr.gr/ (IMBBC), Hellenic Centre for Marine Research, HCMR https://www.hcmr.gr/en/ (Ελληνικό Κέντρο Θαλασσίων Ερευνών, ΕΛ.ΚΕ.Θ.Ε.), Heraklion (Crete). Launched in 2003, it has already been operational in 2005 as a Tier 3 Node of EurOBIS and covered the Eastern Mediterranean an

In [69]:
df_unido.sample(4)

Unnamed: 0,date_year,sampleSizeUnit,class,scientificName,depth,institutionCode,shoredistance,decimalLatitude,flags,decimalLongitude,maximumDepthInMeters,absence,date_start,minimumDepthInMeters,subclass,basisOfRecord,sss,sex,organismQuantity,sst,family,kingdom,occurrenceID,bathymetry,node_id,date_end,organismQuantityType,species,sampleSizeValue,genus,waterBody,vernacularName,id_node,id,name,description,theme,url,type,lon,lat,feeds,contacts
92,2010.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1286928000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0.0,12.86,Pandeidae,Animalia,365110,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1286928000000.0,DNA sequence reads,Neoturris pileata,20224.0,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd,OBIS UK,The UK OBIS Node is hosted at the Marine Biolo...,,[https://www.mba.ac.uk],regional,-4.141326,50.364299,[{'id': 'd5d261ce-9955-4c5d-a10b-64bb1d88a3dc'...,"[{'givenname': 'Dan', 'surname': 'Lear', 'ocea..."
30,2018.0,DNA reads,Hydrozoa,Neoturris pileata,10.0,,6836,50.25,[],-4.217,10.0,False,1535501000000.0,10.0,Hydroidolina,MaterialSample,34.79,,0.0,12.86,Pandeidae,Animalia,956831,51.0,[f92d5d7f-47a6-4605-9fd0-a8538dfde3fd],1535501000000.0,DNA sequence reads,Neoturris pileata,1536.0,Neoturris,,,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd,f92d5d7f-47a6-4605-9fd0-a8538dfde3fd,OBIS UK,The UK OBIS Node is hosted at the Marine Biolo...,,[https://www.mba.ac.uk],regional,-4.141326,50.364299,[{'id': 'd5d261ce-9955-4c5d-a10b-64bb1d88a3dc'...,"[{'givenname': 'Dan', 'surname': 'Lear', 'ocea..."
430,2012.0,,Mammalia,Halichoerus grypus,,CEBC,11838,51.00395,[NO_DEPTH],1.59107,,False,1343866000000.0,,Theria,MachineObservation,34.07,Male,,12.56,Phocidae,Animalia,2026_35112,41.0,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],1343866000000.0,,Halichoerus grypus,,Halichoerus,Baie de Somme,Atlantic gray seal,573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31,573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31,OBIS-SEAMAP,"OBIS-SEAMAP, Ocean Biodiversity Information Sy...",Marine Megavertebrates OBIS node,[http://seamap.env.duke.edu],thematic,-78.944105,36.004507,[{'id': '18954703-9b9d-4584-b46d-87846532c5ee'...,"[{'givenname': 'Ei', 'surname': 'Fujioka', 'oc..."
459,1991.0,,Mammalia,Halichoerus grypus,,"Sea Mammal Research Unit, University of St. An...",20576,55.821,[NO_DEPTH],-1.502,,False,686275200000.0,,Theria,MachineObservation,34.45,,,10.18,Phocidae,Animalia,71_8889,67.4,[573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31],686275200000.0,,Halichoerus grypus,,Halichoerus,,Atlantic gray seal,573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31,573654c1-4ce7-4ea2-b2f1-e4d42f8f9c31,OBIS-SEAMAP,"OBIS-SEAMAP, Ocean Biodiversity Information Sy...",Marine Megavertebrates OBIS node,[http://seamap.env.duke.edu],thematic,-78.944105,36.004507,[{'id': '18954703-9b9d-4584-b46d-87846532c5ee'...,"[{'givenname': 'Ei', 'surname': 'Fujioka', 'oc..."


In [68]:
df_unido.columns

Index(['date_year', 'sampleSizeUnit', 'class', 'scientificName', 'depth',
       'institutionCode', 'shoredistance', 'decimalLatitude', 'flags',
       'decimalLongitude', 'maximumDepthInMeters', 'absence', 'date_start',
       'minimumDepthInMeters', 'subclass', 'basisOfRecord', 'sss', 'sex',
       'organismQuantity', 'sst', 'family', 'kingdom', 'occurrenceID',
       'bathymetry', 'node_id', 'date_end', 'organismQuantityType', 'species',
       'sampleSizeValue', 'genus', 'waterBody', 'vernacularName', 'id_node',
       'id', 'name', 'description', 'theme', 'url', 'type', 'lon', 'lat',
       'feeds', 'contacts'],
      dtype='object')

In [74]:
df_sin_limpiar = df_unido.drop(columns=['decimalLatitude', 'decimalLongitude', 'contacts', 'feeds', 'id', 'description', 'theme', 'url', 'node_id', 'type' ])

In [75]:
df_sin_limpiar.columns

Index(['date_year', 'sampleSizeUnit', 'class', 'scientificName', 'depth',
       'institutionCode', 'shoredistance', 'flags', 'maximumDepthInMeters',
       'absence', 'date_start', 'minimumDepthInMeters', 'subclass',
       'basisOfRecord', 'sss', 'sex', 'organismQuantity', 'sst', 'family',
       'kingdom', 'occurrenceID', 'bathymetry', 'date_end',
       'organismQuantityType', 'species', 'sampleSizeValue', 'genus',
       'waterBody', 'vernacularName', 'id_node', 'name', 'lon', 'lat'],
      dtype='object')

In [76]:
df_sin_limpiar.shape

(519, 33)

In [None]:
df_sin_limpiar.to_csv("files/marinedata.csv")