In [4]:
import pandas as pd
import datetime as dt
from datetime import datetime, timedelta
from suntime import Sun, SunTimeException
from datetime import datetime, timezone
import numpy as np
import glob
import os
import xarray as xr
from pathlib import Path

import gzip
import shutil


def select_daylight_data_juelich(startdate, enddate, path):
    '''
    What does it?
    Step one from the instructions:
    
    Creates a script that reads the files sups_joy_pyr00 (SW data) and sups_joy_pyrg00 (LW data)
    from the folder data/hatpro/jue/hdcp2/radiation_hdcp2 and selects values at the requested satellite time stamps, 
    and saves them in a ncdf file
    
    -> In a timeperiod (between startdate and enddate) opens the daily datasets and selects the timstamps with sunlight.
    Saves it as datasets in a given path. It creates new folders for every year if they do not already exist.
    e.g.: 'path/2018/sups_joy_pyrg00_l1_rlds_v01_daylight20180102000000.nc'
    
    input:
        - startdate: <string>: e.g.: '2020-01-01' 
        - enddate:   <string>: e.g.: '2020-03-02'
        - path:      <string>: where to save the nc-file eg.: 'savings/'

        
    ''' 

    #Range of the datas
    all_dates = pd.date_range(start=startdate,end=enddate)

    #coordinates of Juelich
    latitude = 50.908546
    longitude = 6.413536
    sun = Sun(latitude, longitude)

    for day in all_dates:
        
        
        #getting the time of sunrise +1 hour and the time of sunset - 1 hour. (result has information of timezone)
        sur_tzd = sun.get_local_sunrise_time(day) + timedelta(hours=1)
        sus_tzd = sun.get_local_sunset_time(day) - timedelta(hours=1)

        #delete timezonedesignator (Data from sup_joys are in UTC)
        sur = sur_tzd.replace(tzinfo=None)
        sus = sus_tzd.replace(tzinfo=None)
        #print(sur, 'sunrise')
        

        
        #get the day to string, to open the right file
        y = day.strftime('%Y')
        m = day.strftime('%m')
        d = day.strftime('%d')
        date = y + m + d + '000000.nc'

        #Open Files for LW and shortwave data and put them in one dataset
        
        #starting with LW
        #getting the names from the data. Some names have ..._v00_... and some ..._v01...
        fileName_LW = glob.glob('/data/hatpro/jue/hdcp2/radiation_hdcp2/'+ y +'/sups_joy_pyrg00_l1_rlds_v*_'+ date )
        #if there is no file for the day it goes to the next day
        if not fileName_LW:
            print(date + ' no LW-file for this day found')
            continue
            
        #open files 
        fileObj_LW = Path(str(fileName_LW[0]))
        if fileObj_LW.is_file() == True:
            ds_LW = xr.open_dataset(str(fileName_LW[0]))
            
            #in a really few cases it didn't work out to select times (I guess something is wrong with the datasets). Nevertheless that the program doesent stop there is a try:...
            try:
                ds_LW_light = ds_LW.sel(time=slice(sur, sus))
            
            except:
                print(str(fileName_LW[0]),' something went wrong with selecting sunlight time')
           
            #checking if path/folder already exists
            MYDIR = (path + y)
            CHECK_FOLDER = os.path.isdir(MYDIR)

            # If folder doesn't exist, then create it.
            if not CHECK_FOLDER:
                os.makedirs(MYDIR)
                print("created folder : ", MYDIR)
                
            #saving it as a new dataset 
            save_LW = path + y + '/sups_joy_pyrg00_l1_rlds_v_daylight' + date
            ds_LW_light.to_netcdf(save_LW)  

        else: 
            print('File not found')
            print(fileName[0]_LW)

        #SW
        #getting the names from the data. Some names have ..._v00_... and some ..._v01...
        fileName_SW = glob.glob('/data/hatpro/jue/hdcp2/radiation_hdcp2/'+ y +'/sups_joy_pyr00_l1_rsds_v*_'+ date )
        #if there is no file for the day it goes to the next day
        if not fileName_SW:
            print(date + ' no SW-file for this day found')
            continue
            
        #open files 
        fileObj_SW = Path(str(fileName_SW[0]))
        if fileObj_SW.is_file() == True:
            ds_SW = xr.open_dataset(str(fileName_SW[0]))
            
            #in a really few cases it didn't work out to select times (I guess something is wrong with the datasets). Nevertheless that the program doesent stop there is a try:...
            try:
                ds_SW_light = ds_SW.sel(time=slice(sur, sus))
            
            except:
                print(str(fileName_LW[0]),' something went wrong with selecting sunlight time')
           
            #checking if path/folder already exists
            MYDIR = (path + y)
            CHECK_FOLDER = os.path.isdir(MYDIR)

            # If folder doesn't exist, then create it.
            if not CHECK_FOLDER:
                os.makedirs(MYDIR)
                print("created folder : ", MYDIR)
                
            #saving it as a new dataset 
            save_SW = path + y + '/sups_joy_pyr00_l1_rsds_v_daylight' + date
            ds_SW_light.to_netcdf(save_SW)  

        else: 
            print('File not found')
            print(fileName[0]) 

def select_daylight_solarpanel_data(startdate, enddate, path):
    '''
    What does it?
    Step two from the instructions
    
    -> In a timeperiod (between startdate and enddate) unzips the daily datasets, opens them and selects the timstamps with sunlight.
    Saves it as datasets in a given path. It creates new folders for every year if they do not already exist.
    e.g.: 'path/2016/details20161005.nc'
    
    input:
        - startdate: <string>: e.g.: '2020-01-01'  Only datas in Year 2016, 2017, 2020, 2021, 2022
        - enddate:   <string>: e.g.: '2020-03-02'
        - path:      <string>: where to save the nc-file eg.: 'savings1/'

        
    ''' 

    #Range of the datas
    all_dates = pd.date_range(start=startdate,end=enddate)

    #coordinates of Juelich
    latitude = 50.908546
    longitude = 6.413536
    sun = Sun(latitude, longitude)

    for day in all_dates:
        ds = xr.Dataset()
        ds_light = xr.Dataset()

        
        #getting the time of sunrise +1 hour and the time of sunset - 1 hour. (result has information of timezone)
        sur_tzd = sun.get_local_sunrise_time(day) + timedelta(hours=1)
        sus_tzd = sun.get_local_sunset_time(day) - timedelta(hours=1)

        #delete timezonedesignator (Data from sup_joys are in UTC)
        sur = sur_tzd.replace(tzinfo=None)
        sus = sus_tzd.replace(tzinfo=None)
        #print(sur, 'sunrise')
        

        
        #get the day to string, to open the right file
        y = day.strftime('%Y')
        m = day.strftime('%m')
        d = day.strftime('%d')
        date = y + m + d 


        #Unzipp the data and saving them in between
        fileName = '/data/obs/site/jue/pvm/l1/' + y + '/' + m + '/details' + date + '.nc.gz'
        fileObj = Path(fileName)
        if fileObj.is_file() == True:
            with gzip.open('/data/obs/site/jue/pvm/l1/' + y + '/' + m + '/details' + date + '.nc.gz', 'rb') as f_in:
                with open('date.nc', 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
                    
        else: 
            print('File not found: ', fileName)
            continue
       
    
        #open data
        if fileObj.is_file() == True:
            ds = xr.open_dataset('date.nc')
 
        else: 
            print(date, ' File not found: probably something went wrong with unzipping them')
            continue
            
        ds_light = ds.sel(time10s=slice(sur, sus), time60s=slice(sur, sus))
        

        
        #checking if path/folder already exists
        MYDIR = (path + y)
        CHECK_FOLDER = os.path.isdir(MYDIR)

        # If folder doesn't exist, then create it.
        if not CHECK_FOLDER:
            os.makedirs(MYDIR)
            print("created folder : ", MYDIR)

        #saving it as a new dataset 
        save = path + y + '/details' + date + '.nc'
        try:
            ds_light.to_netcdf(save)  
            #print(date, 'saved')
        except:
            print(date, 'saving didnt work')
        

    # Deleting the in between unzipped file
    file_path = 'date.nc'
    os.remove(file_path)




In [5]:
select_daylight_data_juelich('2011-01-01', '2022-06-01', 'savings/')
  

select_daylight_solarpanel_data('2016-10-01', '2017-10-30', 'savings1/')


20110101000000.nc no LW-file for this day found
20110102000000.nc no LW-file for this day found
20110103000000.nc no LW-file for this day found
20110104000000.nc no LW-file for this day found
20110105000000.nc no LW-file for this day found
20110106000000.nc no LW-file for this day found
20110107000000.nc no LW-file for this day found
20110108000000.nc no LW-file for this day found
20110109000000.nc no LW-file for this day found
20110110000000.nc no LW-file for this day found
20110111000000.nc no LW-file for this day found
20110112000000.nc no LW-file for this day found
20110113000000.nc no LW-file for this day found
20110114000000.nc no LW-file for this day found
20110115000000.nc no LW-file for this day found
20110116000000.nc no LW-file for this day found
20110117000000.nc no LW-file for this day found
20110118000000.nc no LW-file for this day found
20110119000000.nc no LW-file for this day found
20110120000000.nc no LW-file for this day found
20110121000000.nc no LW-file for this da

20110727000000.nc no LW-file for this day found
20110728000000.nc no LW-file for this day found
20110729000000.nc no LW-file for this day found
20110730000000.nc no LW-file for this day found
20110731000000.nc no LW-file for this day found
20110801000000.nc no LW-file for this day found
20110802000000.nc no LW-file for this day found
20110803000000.nc no LW-file for this day found
20110804000000.nc no LW-file for this day found
20110805000000.nc no LW-file for this day found
20110806000000.nc no LW-file for this day found
20110807000000.nc no LW-file for this day found
20110808000000.nc no LW-file for this day found
20110809000000.nc no LW-file for this day found
20110810000000.nc no LW-file for this day found
20110811000000.nc no LW-file for this day found
20110812000000.nc no LW-file for this day found
20110813000000.nc no LW-file for this day found
20110814000000.nc no LW-file for this day found
20110815000000.nc no LW-file for this day found
20110816000000.nc no LW-file for this da

20120117000000.nc no SW-file for this day found
20120118000000.nc no SW-file for this day found
20120119000000.nc no SW-file for this day found
20120120000000.nc no SW-file for this day found
20120121000000.nc no SW-file for this day found
20120122000000.nc no SW-file for this day found
20120123000000.nc no SW-file for this day found
20120124000000.nc no SW-file for this day found
20120125000000.nc no SW-file for this day found
20120126000000.nc no SW-file for this day found
20120127000000.nc no SW-file for this day found
20120128000000.nc no SW-file for this day found
20120129000000.nc no SW-file for this day found
20120130000000.nc no SW-file for this day found
20120131000000.nc no SW-file for this day found
20120201000000.nc no SW-file for this day found
20120202000000.nc no SW-file for this day found
20120203000000.nc no SW-file for this day found
20120204000000.nc no SW-file for this day found
20120205000000.nc no SW-file for this day found
20120206000000.nc no SW-file for this da

20120703000000.nc no SW-file for this day found
20120704000000.nc no SW-file for this day found
20120705000000.nc no SW-file for this day found
20120706000000.nc no SW-file for this day found
20120707000000.nc no SW-file for this day found
20120708000000.nc no SW-file for this day found
20120709000000.nc no SW-file for this day found
20120710000000.nc no SW-file for this day found
20120711000000.nc no SW-file for this day found
20120712000000.nc no SW-file for this day found
20120713000000.nc no SW-file for this day found
20120714000000.nc no SW-file for this day found
20120715000000.nc no SW-file for this day found
20120716000000.nc no SW-file for this day found
20120717000000.nc no SW-file for this day found
20120718000000.nc no SW-file for this day found
20120719000000.nc no SW-file for this day found
20120720000000.nc no SW-file for this day found
20120721000000.nc no SW-file for this day found
20120722000000.nc no SW-file for this day found
20120723000000.nc no SW-file for this da

20130523000000.nc no SW-file for this day found
20130524000000.nc no SW-file for this day found
20130525000000.nc no SW-file for this day found
20130526000000.nc no SW-file for this day found
20130527000000.nc no SW-file for this day found
20130528000000.nc no SW-file for this day found
20130529000000.nc no SW-file for this day found
20130530000000.nc no SW-file for this day found
20130531000000.nc no SW-file for this day found
20130601000000.nc no SW-file for this day found
20130602000000.nc no SW-file for this day found
20130603000000.nc no SW-file for this day found
20130604000000.nc no SW-file for this day found
20130605000000.nc no SW-file for this day found
20130606000000.nc no SW-file for this day found
20130607000000.nc no SW-file for this day found
20130608000000.nc no SW-file for this day found
20130609000000.nc no SW-file for this day found
20130610000000.nc no SW-file for this day found
20130611000000.nc no SW-file for this day found
20130612000000.nc no SW-file for this da

20131111000000.nc no SW-file for this day found
20131112000000.nc no SW-file for this day found
20131113000000.nc no SW-file for this day found
20131114000000.nc no SW-file for this day found
20131115000000.nc no SW-file for this day found
20131116000000.nc no SW-file for this day found
20131117000000.nc no SW-file for this day found
20131118000000.nc no SW-file for this day found
20131119000000.nc no SW-file for this day found
20131120000000.nc no SW-file for this day found
20131121000000.nc no SW-file for this day found
20131122000000.nc no SW-file for this day found
20131123000000.nc no SW-file for this day found
20131124000000.nc no SW-file for this day found
20131125000000.nc no SW-file for this day found
20131126000000.nc no SW-file for this day found
20131127000000.nc no SW-file for this day found
20131128000000.nc no SW-file for this day found
20131129000000.nc no SW-file for this day found
20131130000000.nc no SW-file for this day found
20131201000000.nc no SW-file for this da

KeyboardInterrupt: 

In [6]:
xr.open_dataset('savings1/2016/details20161006' + '.nc')

In [4]:
import pandas as pd
import datetime as dt
from datetime import datetime, timedelta
from suntime import Sun, SunTimeException
from datetime import datetime, timezone
import numpy as np
import glob

import os
import pandas as pd
import xarray as xr
from pathlib import Path



def new_Dataset(startdate, enddate, path, endname):
    '''
    What does it?
    Step 4 from the Instructions:
    
    4. Saving data in array dataset instead of pandas data frame
        1. Iterate on a list of files from input
        2. Read also SW and LW downwelling, as well as direct/diffuse SW radiation
    
    
    -> In a timeperiod (between startdate and enddate) opens the daily datasets, puts all data (LW, LW_error, SW, SW_error) 
    into one dataset and selects the timstamps with sunlight.
    Saves it as a dataset in a given name and path. It creates new folders for every year if they do not already exist.
    
    
    input:
        - startdate: <string>: e.g.: '2020-01-01' 
        - enddate:   <string>: e.g.: '2020-03-02'
        - endname:   <string>: e.g.: 'all_data'  automaticlly appends the date of the day and an .nc at the end
        - path:      <string>: where to save the nc-file eg.: 'savings/'

        
    ''' 



    #Range of the datas
    all_dates = pd.date_range(start=startdate,end=enddate)



    #coordinates of Juelich
    latitude = 50.908546
    longitude = 6.413536
    sun = Sun(latitude, longitude)

    for day in all_dates:

        #get the day to string, to open the right file
        y = day.strftime('%Y')
        m = day.strftime('%m')
        d = day.strftime('%d')
        date = y + m + d + '000000.nc'

        #Open Files for LW and shortwave data and put them in one dataset
         #getting the names from the data. Some names have ..._v00_... and some ..._v01...
        fileName_lw = glob.glob('/data/hatpro/jue/hdcp2/radiation_hdcp2/'+ y +'/sups_joy_pyrg00_l1_rlds_v*_'+ date )
        #if there is no file for the day it goes to the next day
        if not fileName_lw:
            print(date + ' no LW-file for this day found')
            continue

        #open files 
        fileObj_lw = Path(str(fileName_lw[0]))
        if fileObj_lw.is_file() == True:
            ds_lw = xr.open_dataset(str(fileName_lw[0]),
                                   drop_variables = ['lon', 'lat'])

        else: 
            print('File not found', fileName_lw[0])
            continue

        #getting the names from the data. Some names have ..._v00_... and some ..._v01...
        fileName_sw = glob.glob('/data/hatpro/jue/hdcp2/radiation_hdcp2/'+ y +'/sups_joy_pyr00_l1_rsds_v*_'+ date )
        #if there is no file for the day it goes to the next day
        if not fileName_sw:
            print(date + ' no SW-file for this day found')
            continue

        #open files 
        fileObj_sw = Path(str(fileName_sw[0]))
        if fileObj_sw.is_file() == True:
            ds_sw = xr.open_dataset(str(fileName_sw[0]))
        else: 
            print('File not found', fileName_sw[0])
            continue

        #put the sw data to the lw data
        ds_lw['rsds'] = ds_sw['rsds']
        ds_lw['rsds_error'] = ds_sw['rsds_error']
        ds = ds_lw


        #getting the time of sunrise +1 hour and the time of sunset - 1 hour. (result has information of timezone)
        sur_tzd = sun.get_local_sunrise_time(day) + timedelta(hours=1)
        sus_tzd = sun.get_local_sunset_time(day) - timedelta(hours=1)

        #delete timezonedesignator (Data from sup_joys are in UTC)
        sur = sur_tzd.replace(tzinfo=None)
        sus = sus_tzd.replace(tzinfo=None)

        ds_light = ds.sel(time=slice(sur, sus))


        #checking if path/folder already exists
        MYDIR = (path + '/' + y)
        CHECK_FOLDER = os.path.isdir(MYDIR)

        # If folder doesn't exist, then create it.
        if not CHECK_FOLDER:
            os.makedirs(MYDIR)
            print("created folder : ", MYDIR)

        #saving it as a new dataset 
        save = path +'/'+ y + '/'+ endname + date 
        ds_light.to_netcdf(save)  


new_Dataset('2014-10-01', '2015-10-25', 'full_datasets', 'all_data')

created folder :  full_datasets/2014
created folder :  full_datasets/2015


In [4]:
#how to open the dataset and read it to use it for the program

import xarray as xr
from pathlib import Path

startdate = '2020-01-01'
enddate = '2020-02-01'
path = 'savings/'
name = 'daylight'

#call function
timesteps_light_juelich(startdate, enddate, path, name)


fileName = path + name
fileObj = Path(fileName)
if fileObj.is_file() == True:
    ds = xr.open_dataset(fileName)
else: 
    print('File not found') 

dates = ds.date.data
hours = ds.hour.data
minutes = ds.minute.data

print(dates)
print(hours)
print(minutes)

['20200101' '20200101' '20200101' ... '20200201' '20200201' '20200201']
['09' '09' '09' ... '16' '16' '16']
['40' '45' '50' ... '15' '20' '25']
