<a href="https://colab.research.google.com/github/1kaiser/test2022/blob/main/NetCFD_CSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Convert NetCDF file(s) to CSV file(s)** 
[source link](https://gist.github.com/copernicusmarinegist/b57417225d0d4ea47c5d6200f9d8cac3)

### create directories

In [None]:
# Replace 'local_storage_directory', 'netcdf_dir' and 'csv_dir' by respectively
# the directory path to Copernicus Marine data, the directory path to netcdf files
# and the directory path to csv files
local_storage_directory = '/content/'
netcdf_dir = local_storage_directory + 'netcdf/'
!mkdir -p {netcdf_dir}
csv_dir = local_storage_directory + 'csv/'
!mkdir -p {csv_dir}
!python -m pip install xarray

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### conversion to **CSV** from **NetCFD**

In [None]:
import xarray as xr
import os

# Set a new directory path where the list of netcdf files (.nc) is stored.
# Path should end with a 'slash (/)'
files_to_convert = local_storage_directory + 'netcdf/'
# Set a loop to create a .csv file for each .nc file listed in `files_to_convert`
for netcdf_file_name in os.listdir(files_to_convert):
    ds = xr.open_dataset(files_to_convert + netcdf_file_name)
    df = ds.to_dataframe()
    df.to_csv(csv_dir + netcdf_file_name[:-3] + '.csv')
    print (netcdf_file_name + ' has been processed to .csv')
    print (df)

PBLH202104_V1.nc has been processed to .csv
         latitude  longitude    pblh
lt  ln                              
0   0       5.125     50.125 -9999.0
    1       5.125     50.375 -9999.0
    2       5.125     50.625 -9999.0
    3       5.125     50.875 -9999.0
    4       5.125     51.125 -9999.0
...           ...        ...     ...
139 235    39.875    108.875 -9999.0
    236    39.875    109.125 -9999.0
    237    39.875    109.375 -9999.0
    238    39.875    109.625 -9999.0
    239    39.875    109.875 -9999.0

[33600 rows x 3 columns]
_Clim_Pred_LRF_New_RF25_IMD0p252021.nc has been processed to .csv
                               RAINFALL
LONGITUDE LATITUDE TIME                
66.5      6.5      2021-01-01       NaN
                   2021-01-02       NaN
                   2021-01-03       NaN
                   2021-01-04       NaN
                   2021-01-05       NaN
...                                 ...
100.0     38.5     2021-12-27       NaN
                   2021

### cleaning data with rows having column '-9999'

In [None]:
!rm -rf `find -type d -name .ipynb_checkpoints`
import pandas as pd
import os

for csv_file_name in os.listdir(csv_dir):
    csv_uncleaned_in = csv_file_name[:-4] + '.csv'
    csv_cleaned_out = csv_uncleaned_in[:-4] + '_cleaned.csv'
    data = pd.read_csv(csv_dir + csv_uncleaned_in)
    data = data[data.pblh > 0]
    print(data.dropna())
    data.dropna().to_csv(csv_dir + csv_cleaned_out, index = False)
    


        lt   ln  latitude  longitude         pblh
182      0  182     5.125     95.625  1787.625605
183      0  183     5.125     95.875  2002.014425
184      0  184     5.125     96.125  1813.101425
185      0  185     5.125     96.375  2140.641231
186      0  186     5.125     96.625  2240.990078
...    ...  ...       ...        ...          ...
31858  132  178    38.125     94.625   261.200955
32072  133  152    38.375     88.125   111.200955
32102  133  182    38.375     95.625   859.752433
32313  134  153    38.625     88.375   259.173411
32340  134  180    38.625     95.125    24.331306

[10338 rows x 5 columns]


#  imd data rainfall<<<

In [None]:
!python -m pip install imdlib

In [None]:
#@title Default title text { vertical-output: true }
START_YEAR = 2000 #@param {type:"integer"}
END_YEAR =  2022#@param {type:"integer"}

import imdlib as imd
import numpy as np
import pandas as pd
file_directory = "/content/"
csv_dir = file_directory + 'csv/'
!mkdir -p {csv_dir}
""" 
# install imdlib python library
# you should be connected to internet for downloading the data
#-9999 value is for no data in saved csv file
# This code will download the imd data first and then convert the data to csv file
if you have data already downloaded then create folder named rain/tmax/tmin inside any folder and 
copy yearly data files in the respective folder and rename yearly data file as year name i.e 1951.GRD 1952.GRD etc and 
comment the line imd.get_data(variable,start_yr) and run the code it will convert the binary .GRD data into csv file
"""
start_yr = START_YEAR # give starting year from which you want to download/convert data: 1901 ownwards for rainfall, 1951 for tmax and tmin
end_yr = END_YEAR # give ending year upto which you want to download/convert data
variable = 'rain' # give variable name (rain for rainfall, tmax or tmin for min or max temperature)
file_format = 'yearwise' # other option (None), which will assume deafult imd naming convention
imd.get_data(variable, start_yr, end_yr, fn_format='yearwise', file_dir=file_directory) # download IMD data: just change path as per your requirement
file_dir = file_directory # this path should be same as mentioned in previous line
data = imd.open_data(variable, start_yr, end_yr,'yearwise', file_dir) # this will open the data downloaded and saved in the location mentioned in previous line
if variable == 'rain':
    grid_size = 0.25 # grid spacing in deg
    y_count = 129 # no of grids in y direction
    x_count = 135 # no of grids in x direction
    x = 66.5 # starting longitude taken from control file (.ctl)
    y = 6.5 # starting latitude taken from control file (.ctl)
elif variable == 'tmax' or variable == 'tmin':
    grid_size = 1 # grid spacing in deg
    y_count = 31 # no of grids in y direction
    x_count = 31 # no of grids in x direction
    x = 67.5 # starting longitude taken from control file (.ctl)
    y = 7.5 # starting latitude taken from control file (.ctl)

#print(grid_size,x_count, y_count, x, y)
data
data.shape
np_array = data.data
#print(np_array[0,0,0])
#xr_objecct = data.get_xarray()
#type(xr_objecct)
#xr_objecct.mean('time').plot()
years_no = (end_yr - start_yr) + 1
#print(years_no)
day = 0
for yr in range(0,years_no):
    f = open(csv_dir +str(start_yr+yr)+"_"+str(variable)+".csv",'w') # just change the path where you want to save csv file
    if ((start_yr+yr) % 4 == 0) and ((start_yr+yr) % 100 != 0):  # check for leap year
        days = 366
        count = yr + days
    elif ((start_yr+yr) % 4 == 0) and ((start_yr+yr) % 100 == 0) and ((start_yr+yr) % 400 == 0):
        days = 366
        count = yr + days
    else:
        days = 365
        count = yr + days

    day = day + days

    f.write("X,Y,")
    for d in range(0, days):
        f.write(str(d+1))
        f.write(",")
    f.write("\n")
    #print(np_array[364,0,0])
    for j in range(0, y_count):

        for i in range(0, x_count):

            f.write(str((i * grid_size) + x))
            f.write(",")
            f.write(str((j * grid_size) + y))
            f.write(",")
            time = 0
            for k in range(day-days, day):

                val = np_array[k,i,j]
                if val == 99.9000015258789 or val == -999:
                    f.write(str(-9999))
                    f.write(",")
                else:
                    f.write(str(val))
                    f.write(",")


            f.write("\n")
    print("File for " + str(start_yr + yr) + "_" + str(variable) + " is saved")
print("CSV conversion successful !")

Downloading: rain for year 2000
Downloading: rain for year 2001


In [None]:
!rm -rf `find -type d -name .ipynb_checkpoints`
import pandas as pd
import os

for csv_file_name in os.listdir(csv_dir):
    csv_uncleaned_in = csv_file_name[:-4] + '.csv'
    csv_cleaned_out = csv_uncleaned_in[:-4] + '_cleaned.csv'
    data = pd.read_csv(csv_dir + csv_uncleaned_in)
    !rm -r {csv_dir + csv_uncleaned_in}
    data = data.drop(data[data['1'] < 0].index)
    print(data)
    data.to_csv(csv_dir + csv_cleaned_out, index = False)
    


           X      Y          1    2         3         4         5          6  \
987    77.00   8.25   2.326143  0.0  0.000000  1.322984  4.881387   1.753134   
988    77.25   8.25   3.664898  0.0  0.000000  0.000000  1.788203   0.000000   
989    77.50   8.25  18.975653  0.0  0.000000  2.973083  0.040350   0.059946   
990    77.75   8.25  18.582087  0.0  0.000000  2.183249  3.511944   3.103261   
1121   76.75   8.50   3.746353  0.0  0.000000  2.663133  8.128388   0.518859   
...      ...    ...        ...  ...       ...       ...       ...        ...   
16506  75.50  37.00   0.000000  0.0  1.367865  2.418013  4.591004  27.251225   
16637  74.50  37.25   0.000000  0.0  0.425546  2.268913  3.766507  19.281239   
16638  74.75  37.25   0.000000  0.0  0.639639  2.472864  4.178602  22.422531   
16639  75.00  37.25   0.000000  0.0  0.880022  2.544067  4.391771  24.844952   
16640  75.25  37.25   0.000000  0.0  1.097578  2.527547  4.458756  26.400146   

                  7    8  ...  357     