In [1]:
#Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Past Installs 
!pip install sunpy  #
!pip install zeep
!pip install drms
!pip install aiapy
!pip install wget

In [4]:
import astropy.units as u
from astropy.coordinates import SkyCoord
import sunpy.map
from sunpy.net import Fido, attrs as a
from matplotlib import patches
import matplotlib.pyplot as plt
import numpy as np
from aiapy.calibrate import register, update_pointing, normalize_exposure
import os, os.path
import pandas as pd
import random

In [5]:
def get_images(times,down_path,lamb,year,month,day,ordered):
  """
  Download AIA images using the Fido search.

  Inputs
  ------
    times     : 2-D Array where each sub-array lists one hour and minute
    down_path : Location where images are downloaded to
    lamb      : Wavelengths to download images from
    year      : Year that images come from
    month     : Month that images come from
    day       : Day that images come from
    ordered   : True if the times are listed in order, False if not
  
  Outputs
  -------
    No outputs are returned
  """

  #saving input day so the variable day can be updated if needed
  day_input = day

  #time as 4-digit integer value for quick comparisons. (i.e. 4:20 PM -> 1620)
  initial_time_value = times[0,0]*100+times[0,1]

  for t in times:

    # Check to see if event time crosses over to the next day
    if ordered and t[0]*100+t[1]<initial_time_value:
      day=day_input+1
      print("next_day_check")

    #Start times and end times to use for fido search
    start_time_fido = (str(year)+'-'+str(month).zfill(2)+'-'+str(day).zfill(2)+'T'
                      +str(t[0]).zfill(2)+':'+str(t[1]).zfill(2)+":00" )
    

    end_time_fido = (str(year)+'-'+str(month).zfill(2)+'-'+str(day).zfill(2)+'T'
                    +str(t[0]).zfill(2)+':'+str(t[1]).zfill(2)+":59" )
    
    # Used to select random image out of those taken during the minute specified
    # by 't'. Using the same seed for all wavelengths ensures that each of the 
    # different wavelength images selected are as close together (in time) as possible
    random_seed = random.random()

    for l in lamb:
      # All the observations during minute 't' in wavelength 'l'
      results = Fido.search(a.Time(start_time_fido, end_time_fido), 
                        a.Instrument('AIA'),
                        a.Wavelength(wavemin=l*u.angstrom, wavemax=l*u.angstrom));

      # Check needed in case telescope was not running at time/has no observations
      if np.size(results[0,:])>0:
        # Picks one of the images and downloades it
        f_select = int(np.size(results,1)*random_seed)
        Fido.fetch(results[0,f_select],path=down_path+'_'+str(l)+'/')


In [11]:
def get_null_times(df, n, year, month, day):
  """
  returns n random times, where events are not present, for a specified date
   
  Inputs
  ------
    dataframe : pandas dataframe object listing all the relvent events to consider
    n         : integer specifying the number of null images to return
    year      : integer specifying the year
    month     : integer specifying the month
    day       : integer specifying the day

  Outputs
  -------
    times     : an array which contains n specified hours/minutes where there 
                are no events occuring
  """
  # number of images selected so far
  count = 0 
  times = np.array([])

  # iterate until found
  while count < n:

    # picking random times
    rand_hour = random.randrange(24)
    rand_min = random.randrange(60)

    # converting times to format that can be easily compared to event report
    rand_time = rand_hour*100+rand_min

    #check day of results
    num_overlap_1 = len((df[ (df['start'] <= rand_time) & 
                           (df['end'] >= rand_time) &
                           (df['year'] == year) &
                           (df['month'] == month) & 
                           (df['day'] == day) ] ))
    
    #check previous day results
    num_overlap_2 = len((df[ (df['start'] >= df['end']) & 
                             (df['end'] >= rand_time) &
                             (df['year'] == year) &
                             (df['month'] == month) &
                             (df['day'] == day-1)] ))
    
    #adding times
    if (num_overlap_1==0 and num_overlap_2==0):
      if count == 0:
        times = np.array([[int(rand_hour),int(rand_min)]])
      else:
        times = np.vstack((times, [int(rand_hour),int(rand_min)]))
      count+=1

  return times

In [12]:
def get_event_times(event_df, year, month, day):
  """
  Select one random time from each of the events in the input dataframe
   
  Inputs
  ------
    event_df  : pandas dataframe object listing all the relevent events to consider
    year      : integer specifying the year
    month     : integer specifying the month
    day       : integer specifying the day

  Outputs
  -------
    times     : an array which contains one specified hour/minute which occurs
                during the duration of each event in event_df
  """
  times = np.array([])
  count = 0
  for index, event in event_df.iterrows():

    #duration of event in minutes
    t_diff = ( (int(str(event.end).zfill(4)[0:2]) - int(str(event.start).zfill(4)[0:2]))%24*60 + 
               (int(str(event.end).zfill(4)[2:4]) - int(str(event.start).zfill(4)[2:4])) )
    
    #selecting random time from event duration
    t_diff_selected = random.randrange(t_diff+1)
    t_selected = int(str(event.start).zfill(4)[0:2])*60 + int(str(event.start).zfill(4)[2:4]) + t_diff_selected

    # converting selected time to hours and minutes
    ts_hours = (t_selected // 60) % 24
    ts_minutes = t_selected % 60

    # initialize or extend final array
    if count==0:
      times = np.array([[ts_hours,ts_minutes]])
    else:
      times = np.vstack((times,[ts_hours,ts_minutes]))
    count+=1

  return times

In [13]:
def get_event_df(parent_df, event_type, year, month, day):
  """
  Returns a list of times corresponding to events that either started or ended
  during a given day (Note this can include events that started on day-1, but
  ended on day)

  Inputs
  ------
    parent_df : pandas dataframe containing all possible events to choose from
    event_type: string identifying the type of event to choose from in parent_df
    year      : int representing the year to choose events from
    month     : int representing the month to choose events from
    day       : int representing the day to choose events from

  Outputs
  -------
    A single pandas dataframe with all events from parent_df that fit the
    specifications given by the remaining inputs
  """
  # dataframe with events that started on day and ended on day
  df_new_1 = parent_df[ (parent_df.event==event_type) & 
                        (parent_df.year==year) & 
                        (parent_df.month==month) & 
                        (parent_df.day==day) ]
  # dataframe with events that started on day-1 and ended on day
  df_new_2 = parent_df[ (parent_df.event==event_type) & 
                        (parent_df.year==year) & 
                        (parent_df.month==month) & 
                        (parent_df.day==day-1) &
                        (parent_df.start > parent_df.end) ]

  #combined event dataframe
  return (pd.concat([df_new_1,df_new_2]))

In [14]:
def mean_pool(square_array,ratio):
  """
  Function to downsample a square array after applying a meanpool

  Inputs
  ------
    square_array : Array to be downsampled. Must be a square array with axes
                   lenghts that can be divisible by ratio
    ratio        : Downsampling ratio. i.e. a 1024x1024 array with a ratio of 4
                   will be downsampled to 256x256
  
  Outputs
  -------
    Returns the downsampled array
  """
  # Dimensions of array
  alen_1 = np.size(square_array,0)
  alen_2 = np.size(square_array,1)
  # Confirming array is square
  if (alen_1!=alen_2):
    print("ERROR: ARRAY NOT SQUARE")
  else:
    return square_array.reshape(int(alen_1/ratio), int(ratio), 
                                int(alen_1/ratio), int(ratio)).mean(axis=(1,3))


In [15]:
def create_storage_dirs(event_path,null_path,sub_fold,lambdas):
  """
  Script to check if the directories for storing AIA images already exist,
  and makes them if not.

  Inputs
  ------
    event_path : path to where all event images for a given event type are stored
    null_path  : path to where all null images for a given event type are stored
    sub_fold   : folder name prefix for a specific time/date
    lambdas    : wavelenghts for which images are being downloaded and stored

  Outpus
  ------
    No outputs are returned
  """
  for wavelength in lambdas:
    # Full folder path name
    storage_path = event_path+sub_fold+"_"+str(wavelength)
    # Make folder if it does not exist
    if not os.path.isdir(storage_path):
      os.mkdir(storage_path)


In [18]:
# Primary Download Script Initial Variable Values

# Where the images are to be stored
xray_event_path = '/content/drive/Shareddrives/Phys 477 - Astro Project/AIA_files/XRAY_events/'
xray_null_path = '/content/drive/Shareddrives/Phys 477 - Astro Project/AIA_files/XRAY_nulls/'

# Time frame to select images from
years = np.arange(2015,2016)
months = np.arange(3,4)
days = np.arange(11,32)

# Event database to select events from
df_main = pd.read_csv('/content/drive/Shareddrives/Phys 477 - Astro Project/df_test.csv')


In [None]:
# Primary Download Script

# Wavelengths to download
lambdas_used = [131,171,211]

for y in years:
  for m in months:
    sub_fold = str(y)+"_"+str(m).zfill(2)
    create_storage_dirs(xray_event_path,xray_null_path,sub_fold,lambdas_used)
    for d in days:
      # Create dataframe containing relevant events
      xray_df = get_event_df(df_main,'XRA',y,m,d)
      
      # Select one time for each event
      xray_event_times = get_event_times(xray_df, y, m, d)

      # Print selected times to screen
      print(xray_event_times)

      # Select one null event time for each positive event
      num_events = np.size(xray_event_times,0)
      xray_null_times = get_null_times(xray_df, num_events, y, m, d)

      # Print selected times to scree
      print(xray_null_times)

      # Download images
      get_images(xray_event_times,xray_event_path+sub_fold,lambdas_used,y,m,d,ordered=True)
      get_images(xray_null_times,xray_null_path+sub_fold,lambdas_used,y,m,d,ordered=False)

[[ 1 23]
 [ 2 17]
 [ 5  0]
 [ 5 11]
 [ 5 57]
 [ 6  8]
 [ 6 47]
 [ 7 21]
 [ 7 52]
 [ 9 27]
 [11 37]
 [13  5]
 [14 44]
 [15 27]
 [16 22]
 [18 44]
 [22 14]
 [23 36]
 [23 48]]
[[10 10]
 [12  6]
 [14 50]
 [21 25]
 [ 3 39]
 [18 32]
 [10 12]
 [10 58]
 [11 11]
 [23  2]
 [ 4  7]
 [ 9 52]
 [22 49]
 [16  0]
 [ 9 42]
 [ 9 53]
 [11  8]
 [ 9 46]
 [ 8 41]]


HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_131a_2015_03_11t01_23_20_63z_image_lev1.fits', m…




HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_171a_2015_03_11t01_23_23_34z_image_lev1.fits', m…




HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_211a_2015_03_11t01_23_23_62z_image_lev1.fits', m…




HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_131a_2015_03_11t02_17_32_62z_image_lev1.fits', m…




HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_171a_2015_03_11t02_17_35_34z_image_lev1.fits', m…




HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='aia_lev1_211a_2015_03_11t02_17_35_62z_image_lev1.fits', m…