### Code for downloading files from NY focused files given by Mike 
link: ftp://ftp1.esrl.noaa.gov/et6/sat/dlj/DEWS/NY_tseries/

In [38]:
import requests
import re
import ftplib
import numpy as np
import os
from tqdm.notebook import tqdm
import netCDF4
from netCDF4 import num2date
import xarray as xr
import pandas as pd

from ftplib import FTP

In [3]:
BASE_URL_NY = "ftp1.esrl.noaa.gov"
path_ny = ['et6', 'sat', 'dlj', 'DEWS', 'NY_tseries']

In [4]:
save_path = '../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/'
save_path

'../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/'

In [5]:
ftp_ny = FTP(BASE_URL_NY)

In [6]:
ftp_ny.login()

'230 Login successful.'

In [7]:
# Navigating to the data folder
for i in path_ny:
    ftp_ny.cwd(i)

In [8]:
file_names = ftp_ny.nlst()
file_names

['eddi_03mn_NY_DEWS_2000-2020.nc',
 'eddi_05mn_NY_DEWS_2000-2020.nc',
 'eddi_06mn_NY_DEWS_2000-2020.nc',
 'pdsi_NY_DEWS_2000-2020.nc',
 'spei180d_NY_DEWS_2000-2020.nc',
 'spei90d_NY_DEWS_2000-2020.nc',
 'spi180d_NY_DEWS_2000-2020.nc',
 'spi90d_NY_DEWS_2000-2020.nc']

In [46]:
file_names[0]

'eddi_03mn_NY_DEWS_2000-2020.nc'

In [47]:
for file in file_names:
#     print(f'file is: {file}')
    file_savepath = os.path.join(save_path, file)
#     print(f'file save path: {file_savepath}')
    os.makedirs(os.path.dirname(file_savepath), exist_ok=True)
    
    with open(file_savepath, 'wb') as f:
        ftp_ny.retrbinary('RETR ' + file, f.write)

### Converting the .nc files to .csv files
The files are present in 'save_path' and the names of the files are captured in file_names

In [34]:
# Checking the data
file_path = os.path.join(save_path, file_names[0])
print(f'file path: {file_path}')   
f = netCDF4.Dataset(file_path)
print(f)
print(np.unique(f.variables['State_Mask'][:].data))
print(len(np.unique(f.variables['State_Mask'][:].data)))
print(np.unique(f.variables['County_Mask'][:].data))
print(len(np.unique(f.variables['County_Mask'][:].data)))
print(np.unique(f.variables['Latitude'][:].data))
print(len(np.unique(f.variables['Latitude'][:].data)))
print(np.unique(f.variables['Longitude'][:].data))
print(len(np.unique(f.variables['Longitude'][:].data)))
print(np.unique(f.variables['EDDI'][:].data))
print(len(np.unique(f.variables['EDDI'][:].data)))

file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_03mn_NY_DEWS_2000-2020.nc
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    DEWS_Region: NY
    Contact_Email: Darren.L.Jackson@noaa.gov
    Creation_Date: Mar 31, 2021
    dimensions(sizes): west_east(65), south_north(38), time(7671)
    variables(dimensions): int32 Days(time), float32 Longitude(south_north, west_east), float32 Latitude(south_north, west_east), int16 State_Mask(south_north, west_east), int16 County_Mask(south_north, west_east), int16 EDDI(time, south_north, west_east)
    groups: 
[0 1]
2
[  0   1   3   5   7   9  11  13  15  17  19  21  23  25  27  29  31  33
  35  37  39  41  43  45  47  49  51  53  55  57  59  61  63  65  67  69
  71  73  75  77  79  81  83  85  87  89  91  93  95  97  99 101 103 105
 107 109 111 113 115 117 119 121 123]
63
[40.438 40.563 40.688 40.813 40.938 41.063 41.188 41.313 41.438 41.563
 41.688

### Looping through all the files to convert to csv

In [101]:
# Loading the fips code and county name dataset
df_county = pd.read_csv('../../../../../../../Google Drive/Shared drives/Capstone Project/Data/TempPrecip/fips_county.csv')
df_county.head()

Unnamed: 0,fips,county
0,36001,Albany
1,36003,Allegany
2,36005,Bronx
3,36007,Broome
4,36009,Cattaraugus


In [102]:
start_date = '1-1-1979'

In [103]:
for file in file_names:
    print(f'file is: {file}')
    file_path = os.path.join(save_path, file)
    print(f'file path: {file_path}')
    
    ds = xr.open_dataset(file_path)
    df = ds.to_dataframe()
    
    # Selecting only NY state, State_Mask = 1 for NY and 0 for others
    df = df[df.State_Mask != 0]
    df.reset_index(inplace=True)
    
    # Creating the fips code from the last digits of fips code
    df['fips'] = df['County_Mask'] + 36000
    
    # Getting county names
    df = df.merge(df_county,how='left',on='fips')
    df['county'] = df['county'].str.lower()
    
    # Convert dates
    df['Date'] = pd.to_datetime(start_date) #base date
    df['Date'] = df['Date'] + pd.to_timedelta((df['Days']-1),unit='d')
    
    # Save the files as csv
    df.to_csv(path_or_buf = save_path + file[:-3] + '.csv', index=False)
    print(f'Saved file path: {save_path + file[:-3] + ".csv"}')

file is: eddi_03mn_NY_DEWS_2000-2020.nc
file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_03mn_NY_DEWS_2000-2020.nc
Saved file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_03mn_NY_DEWS_2000-2020.csv
file is: eddi_05mn_NY_DEWS_2000-2020.nc
file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_05mn_NY_DEWS_2000-2020.nc
Saved file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_05mn_NY_DEWS_2000-2020.csv
file is: eddi_06mn_NY_DEWS_2000-2020.nc
file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_06mn_NY_DEWS_2000-2020.nc
Saved file path: ../../../../../../../Google Drive/Shared drives/Capstone Project/Data/Indices-Mike/eddi_06mn_NY_DEWS_2000-2020.csv
file is: pdsi_NY_DEWS_2000-2020.nc
file path: ../../../../../../../Google Drive/Shared drives/Capstone Projec

In [104]:
# Checking the last file (spi90d_NY_DEWS_2000-2020.nc) in the above loop
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1372035 entries, 0 to 1372034
Data columns (total 12 columns):
 #   Column       Non-Null Count    Dtype         
---  ------       --------------    -----         
 0   south_north  1372035 non-null  int64         
 1   time         1372035 non-null  int64         
 2   west_east    1372035 non-null  int64         
 3   Days         1372035 non-null  int32         
 4   Longitude    1372035 non-null  float32       
 5   Latitude     1372035 non-null  float32       
 6   State_Mask   1372035 non-null  int16         
 7   County_Mask  1372035 non-null  int16         
 8   SPI          1372035 non-null  float32       
 9   fips         1372035 non-null  int32         
 10  county       1372035 non-null  object        
 11  Date         1372035 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float32(3), int16(2), int32(2), int64(3), object(1)
memory usage: 94.2+ MB


In [105]:
df.head()

Unnamed: 0,south_north,time,west_east,Days,Longitude,Latitude,State_Mask,County_Mask,SPI,fips,county,Date
0,1,0,45,7675,-74.188004,40.563,1,85,-0.522,36085,richmond,2000-01-05
1,1,1,45,7680,-74.188004,40.563,1,85,-0.584,36085,richmond,2000-01-10
2,1,2,45,7685,-74.188004,40.563,1,85,-0.9,36085,richmond,2000-01-15
3,1,3,45,7690,-74.188004,40.563,1,85,-1.23,36085,richmond,2000-01-20
4,1,4,45,7695,-74.188004,40.563,1,85,-0.949,36085,richmond,2000-01-25


In [106]:
df.nunique()

south_north      36
time           1533
west_east        62
Days           1533
Longitude        62
Latitude         36
State_Mask        1
County_Mask      62
SPI            3541
fips             62
county           62
Date           1533
dtype: int64

In [107]:
df.groupby('county').count()

Unnamed: 0_level_0,south_north,time,west_east,Days,Longitude,Latitude,State_Mask,County_Mask,SPI,fips,Date
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
albany,13797,13797,13797,13797,13797,13797,13797,13797,13797,13797,13797
allegany,24528,24528,24528,24528,24528,24528,24528,24528,24528,24528,24528
bronx,1533,1533,1533,1533,1533,1533,1533,1533,1533,1533,1533
broome,21462,21462,21462,21462,21462,21462,21462,21462,21462,21462,21462
cattaraugus,36792,36792,36792,36792,36792,36792,36792,36792,36792,36792,36792
...,...,...,...,...,...,...,...,...,...,...,...
washington,24528,24528,24528,24528,24528,24528,24528,24528,24528,24528,24528
wayne,15330,15330,15330,15330,15330,15330,15330,15330,15330,15330,15330
westchester,13797,13797,13797,13797,13797,13797,13797,13797,13797,13797,13797
wyoming,18396,18396,18396,18396,18396,18396,18396,18396,18396,18396,18396
