In [2]:
!pip install cdsapi

Collecting cdsapi
  Downloading cdsapi-0.7.5-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting datapi (from cdsapi)
  Downloading datapi-0.1.1-py3-none-any.whl.metadata (17 kB)
Collecting multiurl>=0.3.2 (from datapi->cdsapi)
  Downloading multiurl-0.3.3.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading cdsapi-0.7.5-py2.py3-none-any.whl (12 kB)
Downloading datapi-0.1.1-py3-none-any.whl (26 kB)
Building wheels for collected packages: multiurl
  Building wheel for multiurl (setup.py) ... [?25l[?25hdone
  Created wheel for multiurl: filename=multiurl-0.3.3-py3-none-any.whl size=21230 sha256=ca0658df5feb8a4a3dfba1ec460b4df5f3c623fd41bcbc20399fb731ad6d120e
  Stored in directory: /root/.cache/pip/wheels/be/05/e0/65a6edb0a000498aeaefbadd80228bf5ed1bdbb82840ca1692
Successfully built multiurl
Installing collected packages: multiurl, datapi, cdsapi
Successfully installed cdsapi-0.7.5 datapi-0.1.1 multiurl-0.3.3


In [19]:
from google.colab import userdata
apikey = userdata.get("CDS_TOKEN")
with open("/root/.cdsapirc", "w") as f:
    print("url: https://cds.climate.copernicus.eu/api", file=f)
    print(f"key: {apikey}", file=f)

In [20]:
import cdsapi

dataset = "reanalysis-era5-single-levels"
request = {
    "product_type": ["reanalysis"],
    "variable": ["mean_sea_level_pressure"],
    "year": ["2020"],
    "month": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12"
    ],
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ],
    "time": [
        "00:00", "01:00", "02:00",
        "03:00", "04:00", "05:00",
        "06:00", "07:00", "08:00",
        "09:00", "10:00", "11:00",
        "12:00", "13:00", "14:00",
        "15:00", "16:00", "17:00",
        "18:00", "19:00", "20:00",
        "21:00", "22:00", "23:00"
    ],
    "data_format": "netcdf",
    "download_format": "zip",
    "area": [65, -120, 0, 0]
}

client = cdsapi.Client()
client.retrieve(dataset, request, "/content/data.zip")


2024-12-06 02:33:44,025 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
INFO:datapi.legacy_api_client:[2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-06 02:33:44,030 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
INFO:datapi.le

cf48bdf4dd229f28543766368b03ca7b.zip:   0%|          | 0.00/1.48G [00:00<?, ?B/s]

'/content/data.nc'

In [21]:
!unzip data.zip

Archive:  data.zip
  inflating: data_stream-oper_stepType-instant.nc  


In [21]:
import xarray as xr

filename = 'data_stream-oper_stepType-instant.nc'

# Load the NetCDF dataset
ds = xr.open_dataset(filename)
ds

In [22]:
if 'expver' in ds:
  ds = ds.drop_vars('expver')
  print("Column 'expver' dropped successfully.")
else:
  print("Column 'expver' not found in the dataset.")

if "number" in ds.keys():
  ds = ds.drop_vars('number')
  print("Column 'number' dropped successfully.")
else:
  print("Column 'number' not found in the dataset.")

if "valid_time" in ds.keys():
  ds["valid_time"] = ds["valid_time"].dt.strftime("%y/%m/%d %H:%M:%S")
  print(ds["valid_time"])
  print("Column 'valid_time' convert to string successfully.")
else:
  print("Column 'valid_time' not found in the dataset.")

ds

Column 'expver' dropped successfully.
Column 'number' dropped successfully.
<xarray.DataArray 'valid_time' (valid_time: 8784)> Size: 70kB
array(['20/01/01 00:00:00', '20/01/01 01:00:00', '20/01/01 02:00:00', ...,
       '20/12/31 21:00:00', '20/12/31 22:00:00', '20/12/31 23:00:00'],
      dtype=object)
Coordinates:
  * valid_time  (valid_time) object 70kB '20/01/01 00:00:00' ... '20/12/31 23...
Column 'valid_time' convert to string successfully.


In [7]:
!mkdir /content/processed

In [24]:
import pandas as pd
import numpy as np

# Chunk the dataset
chunk_size = 100  # Adjust as needed
for i in range(0, len(ds.valid_time), chunk_size):
    chunk = ds.isel(valid_time=slice(i, min(i + chunk_size, len(ds.valid_time))))
    # Convert the chunk to a pandas DataFrame
    df = chunk.to_dataframe()

    # Reset the index to make 'time' a regular column
    df = df.reset_index()

    # Save the chunk to a CSV file
    chunk_filename = f"/content/processed/chunk_{i//chunk_size}.csv"
    df.to_csv(chunk_filename, index=False)
    print(f"Chunk {i//chunk_size} saved to {chunk_filename}")

valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 0 saved to /content/processed/chunk_0.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 1 saved to /content/processed/chunk_1.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 2 saved to /content/processed/chunk_2.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 3 saved to /content/processed/chunk_3.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 4 saved to /content/processed/chunk_4.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: object
Chunk 5 saved to /content/processed/chunk_5.csv
valid_time     object
latitude      float64
longitude     float64
msl           float32
dtype: objec

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-24-265c65138f12>", line 22, in <cell line: 9>
    df.to_csv(chunk_filename, index=False)
  File "/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py", line 333, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py", line 3967, in to_csv
    return DataFrameRenderer(formatter).to_csv(
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/formats/format.py", line 1014, in to_csv
    csv_formatter.save()
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/formats/csvs.py", line 270, in save
    self._save()
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/formats/csvs.py", line 275, in _save
    self._save_body()
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/formats/cs

TypeError: object of type 'NoneType' has no len()

In [26]:
!zip -r processed.zip /content/processed/

  adding: content/processed/chunk_0.csv (deflated 87%)
