# Convert netCDF to CSV

First you need to download the file from https://crudata.uea.ac.uk/cru/data/temperature/

This is the link to the file directly: https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT.4.6.0.0.median.nc .

## Imports

In [1]:
import netCDF4
import pandas as pd
import numpy as np

# Time conversion
import time
import datetime

## Read file

In [2]:
precip_nc_file = './HadCRUT.4.6.0.0.median.nc'

In [3]:
nc = netCDF4.Dataset(precip_nc_file, mode='r')

In [4]:
nc.variables.keys()

odict_keys(['latitude', 'longitude', 'time', 'temperature_anomaly', 'field_status'])

In [5]:
latitude = nc.variables['latitude'][:]
longitude = nc.variables['longitude'][:]
time_var = nc.variables['time']
dtime = netCDF4.num2date(time_var[:],time_var.units)
raw_temperatures = nc.variables['temperature_anomaly']
# status = nc.variables['field_status'][:]

In [6]:
temperatures = np.array(raw_temperatures)

In [7]:
print(temperatures.shape)
print(latitude.shape)
print(longitude.shape)
print(dtime.shape)

(2023, 36, 72)
(36,)
(72,)
(2023,)


In [8]:
dateIndex = range(0, len(dtime))
latIndex = range(0, len(latitude))
lonIndex = range(0, len(longitude))

In [9]:
index = pd.MultiIndex.from_product(
    [dateIndex, latIndex, lonIndex], 
    names = ["dateIndex", "latitudeIndex", "longitudeIndex"]
)

In [10]:
df = pd.DataFrame(data={'temperatures': temperatures.flatten(), 'index': index})

In [11]:
df['date'] = pd.Series(dtime[[x for x,_,_ in index]])
df['latitude'] = pd.Series(latitude[[x for _,x,_ in index]])
df['longitude'] = pd.Series(longitude[[x for _,_,x in index]])

In [12]:
df.sample(2)

Unnamed: 0,temperatures,index,date,latitude,longitude
2183960,-0.398518,"(842, 20, 56)",1920-03-16 12:00:00,12.5,102.5
771292,-0.796119,"(297, 20, 28)",1874-10-16 12:00:00,12.5,-37.5


In [13]:
del df['index']

In [14]:
df.index.names = ['index']

In [15]:
df['epoch'] = (pd.to_datetime(df['date'], unit='s') - datetime.datetime(1970,1,1))\
  .dt.total_seconds().astype(int)

In [16]:
df.sample(2)

Unnamed: 0_level_0,temperatures,date,latitude,longitude,epoch
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1213612,-1e+30,1889-01-16 12:00:00,-52.5,82.5,-2554718400
376164,-1e+30,1862-02-15 00:00:00,-67.5,2.5,-3404246400


In [17]:
df.dtypes

temperatures           float32
date            datetime64[ns]
latitude               float32
longitude              float32
epoch                    int64
dtype: object

In [18]:
del df['date']

In [19]:
df.sample(2)

Unnamed: 0_level_0,temperatures,latitude,longitude,epoch
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4785964,0.485476,-12.5,82.5,1068940800
3817681,-0.201272,67.5,-52.5,85449600


In [20]:
df.to_csv('./output.csv')