In [None]:
import os

import pandas as pd
import numpy as np

from scipy.io import netcdf

In [None]:
DATA_FILE_DIR = './data/'

START_YEAR, END_YEAR = 2011, 2020

NUM_OF_YEARS = END_YEAR - START_YEAR

NUM_OF_MONTHS = 12

NUM_OF_DAYS = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31,}

A function that opens netCDF file and read temperature variable and get the minimum temperature

In [None]:
def get_min_tmp(filename):
    file = netcdf.netcdf_file(filename, mmap=False)
    tmp = file.variables['T_MIN_L103_Pd'].data.copy()
    min_tmp = np.ndarray(tmp.shape[1:])

    for i in range(tmp.shape[1]):
        for j in range(tmp.shape[2]):
            min_tmp[i, j] = min(tmp[0, i, j], tmp[1, i, j], tmp[2, i, j], tmp[3, i, j])
    
    file.close()
    return min_tmp

Read 9 years of temperature data and take the average/minimum  
e.g. date '0417' min_tmp = 20110417_min_tmp + 20130417_min_tmp + ... + 2019_min_tmp

In [None]:
yearly_date_avg, yearly_date_min = {}, {}

for month in range(1, NUM_OF_MONTHS+1, 1):
    for day in range(1, NUM_OF_DAYS[month]+1, 1):
        date_tmps = np.zeros((NUM_OF_YEARS, 151, 361))
        
        if month < 4: # if month<4, data is from 2012 to 2020
            for year in range(START_YEAR+1, END_YEAR+1):
                filename = DATA_FILE_DIR + '{}{:02d}{:02d}.nc'.format(year, month, day)
                assert os.path.isfile(filename), "File Not Found: '{}'".format(filename)
                date_tmps[year-START_YEAR-1] = get_min_tmp(filename)
        else: # if month>=4, data are from 2011 to 2019
            for year in range(START_YEAR, END_YEAR):
                filename = DATA_FILE_DIR + '{}{:02d}{:02d}.nc'.format(year, month, day)
                assert os.path.isfile(filename), "File Not Found: '{}'".format(filename)
                date_tmps[year-START_YEAR] = get_min_tmp(filename)
        
        yearly_date_avg['{:02d}{:02d}'.format(month, day)] = date_tmps.mean(axis=0)
        yearly_date_min['{:02d}{:02d}'.format(month, day)] = date_tmps.min(axis=0)

First add location columns--lat and lon  
Use `int16` type to keep df size small

In [None]:
filename = DATA_FILE_DIR + '20110401.nc'
file = netcdf.netcdf_file(filename, mmap=False)
lat = file.variables['lat'].data.copy()
lon = file.variables['lon'].data.copy()
file.close()

In [None]:
locations = np.zeros((151*361, 2))
for i in range(151):
    for j in range(361):
        locations[i*361+j] = [lat[i], lon[j]]
df = pd.DataFrame(locations, columns=['lat', 'lon'], dtype='int16')

Then add minimum temperature for each date iteratively  
Use `float16` to keep df size small

In [None]:
for month in range(1, NUM_OF_MONTHS+1, 1):
    for day in range(1, NUM_OF_DAYS[month]+1, 1):
        date = '{:02d}{:02d}'.format(month, day)
        some_date_tmp = yearly_date_avg[date].reshape(151*361, 1)
        df[date] = np.round(some_date_tmp, decimals=2).astype('float16')

Save to a .csv file

In [None]:
df.to_csv('yearly_min.csv', index=False)