# Apis a usar

## Open Library API

In [None]:
https://openlibrary.org/

## OpenAlex API

In [None]:
https://api.openalex.org/works

## Crossref

In [None]:
https://api.crossref.org/works

### Extracion de informacion de la API Open Library

In [3]:
import os
os.makedirs("data/raw", exist_ok=True)

In [4]:
import requests
import json
import os

def descargar_openlibrary(query):
    # Crear carpetas si no existen
    os.makedirs("data/raw", exist_ok=True)
    
    url = f"https://openlibrary.org/search.json?q={query}"
    resp = requests.get(url)
    data = resp.json()
    
    with open("data/raw/openlibrary.json", "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

    print("Archivo openlibrary.json guardado correctamente")

descargar_openlibrary("python libros")

Archivo openlibrary.json guardado correctamente


### Conversion a csv

In [5]:
import json
import pandas as pd
import os

# Asegurar carpeta processed
os.makedirs("data/processed", exist_ok=True)

with open("data/raw/openlibrary.json", encoding="utf-8") as f:
    data = json.load(f)

libros = []

for libro in data["docs"][:100]:  # primeros 100 libros
    libros.append({
        "titulo": libro.get("title"),
        "autor": ", ".join(libro.get("author_name", [])) if "author_name" in libro else None,
        "anio_publicacion": libro.get("first_publish_year")
    })

df = pd.DataFrame(libros)
df.to_csv("data/processed/openlibrary.csv", index=False, encoding="utf-8")

df.head()


Unnamed: 0,titulo,autor,anio_publicacion
0,Pythons' Autobiography by the Pythons,"Monty Python, Graham Chapman, John Cleese, Ter...",2003
1,El gran libro de Python,"Marco Buttu, EDIZIONI LSWR",2016
2,Aprende a programar con PYTHON,José Dimas Luján Castillo,2019
3,PYTHON. Curso práctico de formación,Juan Carlos Orós Cabello,2022
4,PYTHON 3 al descubierto 2º Edición,Arturo Fernández Montoro,2013


### Extracion de informacion de la API OpenAlex

In [8]:
import requests
import json
import os

def descargar_openalex(query):
    os.makedirs("data/raw", exist_ok=True)
    
    url = "https://api.openalex.org/works"
    params = {
        "filter": "type:book",
        "search": query,
        "per-page": 50
    }
    
    resp = requests.get(url, params=params)
    resp.raise_for_status()
    data = resp.json()
    
    with open("data/raw/openalex_books.json", "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    
    print("Archivo openalex_books.json guardado correctamente")

# EJECUTAR
descargar_openalex("data science")


Archivo openalex_books.json guardado correctamente


### Conversion a csv

In [9]:
import pandas as pd

with open("data/raw/openalex_books.json", encoding="utf-8") as f:
    data = json.load(f)

libros = []

for item in data["results"]:
    libros.append({
        "titulo": item.get("title"),
        "anio_publicacion": item.get("publication_year"),
        "editorial": item.get("publisher"),
        "tipo": item.get("type")
    })

df = pd.DataFrame(libros)
os.makedirs("data/processed", exist_ok=True)
df.to_csv("data/processed/openalex_books.csv", index=False, encoding="utf-8")

df.head()


Unnamed: 0,titulo,anio_publicacion,editorial,tipo
0,Data Reduction and Error Analysis for the Phys...,1993,,book
1,Spatial Data Science,2023,,book
2,Process Mining: Data Science in Action,2016,,book
3,High-Dimensional Probability: An Introduction ...,2018,,book
4,Data Science for Business: What You Need to Kn...,2013,,book


### Extracion de informacion de la API Crosef

In [12]:
import requests
import json
import os

def descargar_crossref():
    os.makedirs("data/raw", exist_ok=True)

    url = "https://api.crossref.org/works"
    params = {
        "filter": "type:book",
        "rows": 50
    }

    resp = requests.get(url, params=params)
    data = resp.json()

    with open("data/raw/crossref_books.json", "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

    print("Archivo crossref_books.json guardado correctamente")

descargar_crossref()


Archivo crossref_books.json guardado correctamente


### Conversion a csv

In [13]:
import pandas as pd

with open("data/raw/crossref_books.json", encoding="utf-8") as f:
    data = json.load(f)

libros = []

for item in data["message"]["items"]:
    libros.append({
        "titulo": item.get("title", [""])[0],
        "anio": item.get("issued", {}).get("date-parts", [[None]])[0][0],
        "editorial": item.get("publisher"),
        "doi": item.get("DOI")
    })

df = pd.DataFrame(libros)

df.to_csv(
    "data/processed/crossref_books.csv",
    index=False,
    encoding="utf-8"
)

df.head()


Unnamed: 0,titulo,anio,editorial,doi
0,Quantum Microscopy of Biological Systems,2015,Springer International Publishing,10.1007/978-3-319-18938-3
1,Excelling in the Clinic,2022,Springer International Publishing,10.1007/978-3-030-99415-0
2,Process-Oriented Dynamic Capabilities,2014,Springer International Publishing,10.1007/978-3-319-03251-1
3,Democracy and Judicial Reforms in South-East E...,2014,Springer International Publishing,10.1007/978-3-319-04420-0
4,Dichter bij diabetes,2015,Bohn Stafleu van Loghum,10.1007/978-90-368-1053-1
