# 📘 Descarga de Dataset desde NASA
Este notebook descarga archivos `.zip` del dataset de turbinas desde el sitio web de la NASA.

In [None]:
import os
import csv
import requests
from bs4 import BeautifulSoup
from urllib.parse import unquote

In [None]:
# URL del sitio
url = 'https://www.nasa.gov/intelligent-systems-division/discovery-and-systems-health/pcoe/pcoe-data-set-repository'
response = requests.get(url)
print('📡 Estado de la respuesta:', response)

In [None]:
html = response.text
soup = BeautifulSoup(html, 'html.parser')

In [None]:
# Directorios
page_path = 'data/raw/page.html'
csv_path = 'data/raw/links.csv'
output_dir = 'data/raw'
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Guardar página HTML
with open(page_path, 'w', encoding='utf-8') as page:
    page.write(soup.prettify())

In [None]:
# Buscar enlaces .zip
tags = soup.find_all('a', href=True)
enlaces = []
for tag in tags:
    text = tag.get_text(strip=True)
    href = tag['href']
    if href.endswith('.zip') and "turbofan" in href.lower():
        enlaces.append((text, href))

In [None]:
# Guardar links en CSV
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Texto', 'Enlace'])
    writer.writerows(enlaces)

print(f"🗄️ Se guardaron {len(enlaces)} enlaces en links.csv")

In [None]:
# Descargar archivos
with open(csv_path, encoding='utf-8') as fr:
    reader = csv.reader(fr)
    next(reader)
    for text, url in reader:
        filename = unquote(url.split('/')[-1]).strip()
        file_path = os.path.join(output_dir, filename)
        print(f"⬇️ Descargando: {filename}")
        try:
            r = requests.get(url, stream=True)
            r.raise_for_status()
            with open(file_path, 'wb') as f_out:
                for chunk in r.iter_content(8192):
                    f_out.write(chunk)
            print(f"✅ Guardado en: {file_path}\n")
        except Exception as e:
            print(f"✖️ Error al descargar {url}: {e}")