## Goal
    Establish a method for deciding which noaa date 
    should be used in order to compare acive region centroids with image
    
### Sub-goal
    Plot AR from both sets of data and view results 

# ToDo - Finish Method

#### Import Python libraries 

In [1]:
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
%matplotlib inline

#### Import user defined library

In [2]:
import extract_image_features as extract

#### Locally defined functions

In [3]:
def plot_sunspots_and_active_regions(df, scan_year, features, time_slice):
    '''
        use scan_year to shift through noaa observations
        use time_slice to scan through image data
    '''
    #print "Time = {}".format(features[time_slice]['image_time'])
    nova_x , nova_y, nova_cents  = extract.get_noaa_centroids(df, scan_year)
    x_cents, y_cents, _ = extract.get_image_active_region_centroids(features[time_slice])
    plt.figure(figsize=(10,10))
    noaa = plt.scatter(nova_x, nova_y, c='b', marker='o');
    me = plt.scatter(x_cents, y_cents , c='r',marker='+');
    plt.title("Sunspots & Active Regions    " + scan_year);
    plt.legend((me, noaa),
               ('mydata-sunspots', 'noaa-AR'),
               scatterpoints=1,
               loc='lower right',
               ncol=2,
               fontsize=15);

In [4]:
def get_current_date(features_dict):
    for index in range(len(features_dict)):
        try:
            date = "".join(features_dict[index]['image_date'].split())
            break
        except KeyError: 
            pass
    return date

In [5]:
def plot_sunSpots_on_activeRegions(image_activeRegions_array, sunSpot_features_dict, scan_year_singleDay):
    
    plt.figure(figsize = (10,10));
    test_plot_diff  = np.ma.masked_where(image_activeRegions_array == 0.0,\
                                         image_activeRegions_array )

    for index in sunSpot_features_dict.keys():
        if isinstance(index, int) and index > 0:
            plt.scatter(sunSpot_features_dict[index]["x_pos_ave"],\
                        sunSpot_features_dict[index]["y_pos_ave"], c='k', marker ='o');
            plt.scatter(sunSpot_features_dict[index]["x_neg_ave"],\
                        sunSpot_features_dict[index]["y_neg_ave"], c='k', marker ='o');

    plt.imshow(test_plot_diff, interpolation='none', origin='lower');
    plt.title("Sunspots & Active Regions    " + scan_year_singleDay);
    # plt.xlim([775,850])
    # plt.ylim([500,600])
    plt.show();

In [6]:
# Search for previous date algorithm
#
# check day - if day >= 2 , then subtract 1 from day and pass on data to exraction function 
#             if day == 1, then check if 31st of previous month exist
#
#
# Bottom portion is a seperate function
#                              if true, pass on data to exraction function 
#                          elif check if 30st of previous month exist
#                              if true, pass on data to exraction function 
#                          elif check if 29st of previous month exist
#                              if true, pass on data to exraction function 
#                          elif check if 28st of previous month exist
#                              if true, pass on data to exraction function 
#                          else skip image because data doesn't exist
def get_previous_date(year, month, day):
    '''
    Get the date the for day that preceeds image data date
    
    INPUT: Image date, string YYYYMMDD
    OUTPUT: Previous date, string YYYYMMDD
    '''
    # if day >= 2 , then subtract 1 from day
    if int(day) >= 2:
        day = int(day)
        day -= 1
        day = str(day)
        # if resulting day is single digit, add leading '0'
        if len(day) == 1:
            day = "0" + day
    # if day == 1, then get 31st of previous month 
    else:
        # if month >= 2 , then subtract 1 from month
        if int(month) >= 2:
            month = str(int(month) - 1)
            # if month is single digit, add leading '0'
            if len(month) == 1:
                month = "0" + month
        # if month == 1, then set month to 12 and get previous year 
        else:
            month = str(12)
            year = str(int(year) - 1)
        day = "31"

    return year + month + day

In [7]:
def get_prevous_day_noaa_df(noaa_data, year, month, day):
    
    # |date|  |ar_num| |lat/long| |clon| |area(Msolar-hemishpere)|
    df = pd.DataFrame(noaa_data[year])
    df.columns = ["date", 'activeRegionNum', "latLong", "clon", "area", 5,6,7,8]
    df = df[["date", 'activeRegionNum', "latLong", "clon", "area"]]
    
    # get previous date
    previous_day_scan = get_previous_date(year, month, day)
    # get size of data for previous day
    df_size = df[previous_day_scan == df.date].size
    # if df of previous day is empty
    if df_size== 0 :
        
        # update year
        year = previous_day_scan[:4]
        # update month
        month = previous_day_scan[4:6]
        # update day
        day = previous_day_scan[-2:]
        
        # if df is empty and day is greater than 28 (reference to Feburary)
        while df_size == 0 and day >= 28:
            # go back one more day
            #print "previous_day_scan ", previous_day_scan
            previous_day_scan = get_previous_date(year, month, day)
            # check if this day has data
            df_size = df[previous_day_scan == df.date].size
            
            if day == 27:
                return "SKIP IMAGE: day 27 reached"
            if df_size > 0:
                break
                
            # update year
            year = previous_day_scan[:4]
            # update month
            month = previous_day_scan[4:6]
            # update day
            day = previous_day_scan[-2:]
            #print "Year Month Day ", year, month, day

    # return noaa data
    return df[previous_day_scan == df.date], previous_day_scan

In [8]:
def get_current_day_noaa_df(new_feat_objects_singleDay, noaa_data):
    '''
    ToDo: address edge case in which current day is Jan 1st
          and previous day is Dec 31st
          noaa_df will have to the last 3 days of Dec
    '''
    
    # scan_date YYYMMDD
    scan_date = get_current_date(new_feat_objects_singleDay)
    # get current year
    year = scan_date[:4]
    # get current month
    month = scan_date[4:6]
    # get current day
    day = scan_date[-2:]
    
    # |date|  |ar_num| |lat/long| |clon| |area(Msolar-hemishpere)|
    df = pd.DataFrame(noaa_data[year])
    df.columns = ["date", 'activeRegionNum', "latLong", "clon", "area", 5,6,7,8]
    df = df[["date", 'activeRegionNum', "latLong", "clon", "area"]]
    
    return df, year, month, day

In [9]:
def get_currentDay_previusDay_noaa_activeRegions(new_feat_objects_singleDay, noaa_data):
    # get noaa file from same date as image data
    same_day_df, year, month, day = get_current_day_noaa_df(new_feat_objects_singleDay, 
                                                            noaa_data)
    
    current_day = year + month +day
    # these are NOAA active region centroid 
    nova_cents_sameDay, sameDay_df = extract.get_noaa_centroids(same_day_df, 
                                                                current_day)

    previous_day_df, previous_day = get_prevous_day_noaa_df(noaa_data, 
                                                            year, 
                                                            month, 
                                                            day)
    # these are NOAA active region centroid 
    nova_cents_prevDay, prevDay_df = extract.get_noaa_centroids(previous_day_df, 
                                                                previous_day)
    
    return  nova_cents_sameDay, nova_cents_prevDay, sameDay_df, prevDay_df

## Method for identifying related AR between noaa and image
    Here we will develop a function that compares active regions between data sets
    whenever data sets are within a 12 hour range of each other
    
    
    Import extracted feautres (centorids, acive regions,...) in day batches
    Import NOAA data
    Get NOAA data for same day and previous day as Image data
    When getting NOAA AR labels, check for time
        If image data timeStamp is before noon, reference previous day NOAA data
        If image data timeStamp is afternoon, reference same day NOAA data

#### Get Single Day Transformed Image File Names
    This function is for santiy checking AR assginments from code.
    Should not be included in labeling script.

In [10]:
trans_image_path = "/Users/Alexander/NASA/trans_image_data_singleDay/"
trans_filenames_singleDay = extract.get_image_trans_filenames(trans_image_path)

In [11]:
trans_filenames_singleDay[0:3]

['.DS_Store', 'fd_M_96m_01d_3925_0000.pk', 'fd_M_96m_01d_3925_0001.pk']

#### Get Single Day Transformed Image Data Arrays
    This function is for santiy checking AR assginments from code.
    Should not be included in labeling script.

In [12]:
trans_image_path = "/Users/Alexander/NASA/trans_image_data_singleDay/"
trans_images_singleDay = extract.get_trans_image(trans_image_path, trans_filenames_singleDay)

#### Get Single Day Images

In [13]:
path_results = "/Users/Alexander/NASA/feature_extraction_results_singleDay/"
feats_singleDay = extract.get_features(path_results)

#### Load NOAA data

In [16]:
# noaa_path = "/Users/Alexander/NASA/MDI_data/rgn{}.txt".format(noaa_year)
noaa_path = "/Users/Alexander/NASA/noaa_data/"
noaa_data = extract.get_noaa_sunspot_files(noaa_path)

In [21]:
# view years
print "Years range from {} to {}".format(min(noaa_data.keys()), max(noaa_data.keys()))

Years range from 1996 to 2015


#### Get Image Features

In [22]:
new_feat_objects_singleDay = [extract.map_centroids_long_lat(feat) for feat in feats_singleDay]

### time_slice  - an index for a particular image taken within a single day

In [23]:
# choose the kth image out of the list of sample images
time_slice = 0 # for '0', get first file from trans_filenames_singleDay list

In [24]:
# first image of the day
new_feat_objects_singleDay[time_slice]

{0: (0, 0),
 2: {'lat_neg': -8.65,
  'lat_pos': -8.83,
  'long_neg': 222.13,
  'long_pos': 222.42,
  'neg_net_flux': -57276.73,
  'pos_net_flux': 2516320.36,
  'x_neg_ave': 84.18,
  'x_pos_ave': 85.88,
  'y_neg_ave': 434.97,
  'y_pos_ave': 433.35},
 3: {'lat_neg': -7.4,
  'lat_pos': -7.36,
  'long_neg': 239.0,
  'long_pos': 238.0,
  'neg_net_flux': -114687.36,
  'pos_net_flux': 116369.65,
  'x_neg_ave': 180.15,
  'x_pos_ave': 174.51,
  'y_neg_ave': 446.01,
  'y_pos_ave': 446.39},
 4: {'lat_neg': -5.08,
  'lat_pos': -4.14,
  'long_neg': 333.96,
  'long_pos': 332.68,
  'neg_net_flux': -76818.28,
  'pos_net_flux': 21061.0,
  'x_neg_ave': 720.38,
  'x_pos_ave': 713.13,
  'y_neg_ave': 466.64,
  'y_pos_ave': 475.01},
 5: {'lat_neg': 3.3,
  'lat_pos': 3.96,
  'long_neg': 6.32,
  'long_pos': 4.13,
  'neg_net_flux': -14762.44,
  'pos_net_flux': 1594988.8,
  'x_neg_ave': 904.46,
  'x_pos_ave': 892.0,
  'y_neg_ave': 541.52,
  'y_pos_ave': 547.41},
 6: {'lat_neg': 5.32,
  'lat_pos': 5.68,
  'long_

### Get NOAA active region centroids for current day  and for previous day

In [25]:
# get noaa active region centroids for current day, as current image, and for previous day
nova_cents_sameDay,\
nova_cents_prevDay,\
same_day_df,\
previous_day_df = get_currentDay_previusDay_noaa_activeRegions(new_feat_objects_singleDay,
                                                            noaa_data)

In [27]:
# get image centroids
image_cents = extract.get_image_active_region_centroids(new_feat_objects_singleDay[time_slice])

### Get labels for current image based on time of day

In [31]:
loop_counter = 0
# get time of day for image
image_hour = int(new_feat_objects_singleDay[loop_counter]["image_time"][0:2])

# get active regions pairs
# if time is before noon, then use prevDay noaa centroids
if image_hour <= 12:
    print "use previous day noaa_cents"
    ar_pairs = extract.get_shortest_distance_pair(image_cents, nova_cents_prevDay)
# if time is after noon, then use currDay noaa centroids
else:
    print "use current day noaa_cents"
    ar_pairs = extract.get_shortest_distance_pair(image_cents, nova_cents_sameDay)

ar_pairs = extract.check_repeating_noaa_assignments(ar_pairs)
# get active region labels


use previous day noaa_cents


In [37]:
# (img_ar_num, [ { NOAA_ar_num : dist_bw_img_ar_and_NOAA ar } , ..... ])
ar_pairs.items()

[(1, [{4: 4.516015943284523}]), (2, [{1: 0.5044799302251737}])]

### Append Active Region label key:value pairs

In [32]:
for imageInd, noaaInd_dist  in ar_pairs.iteritems():
    imageAR_num = imageInd
    noaaAR_num = noaaInd_dist[0].keys()[0]    
    ar_label = same_day_df.iloc[noaaAR_num]['activeRegionNum']
    new_feat_objects_singleDay[time_slice][imageAR_num]["activeRegionNum"] = ar_label

KeyError: 1

In [33]:
new_feat_objects_singleDay[0]["image_date"]

'2003 10 01'

In [34]:
# image_ar count begins at 0 (i.e. 0,1,2,...)
# noaa_ar count begins at 0 
ar_pairs

defaultdict(list, {1: [{4: 4.516015943284523}], 2: [{1: 0.5044799302251737}]})

In [84]:
# need to collect metrics
# hits
# FP
# emtyp lists
# number of labels genreated & for which images 

def get_active_region_labels(new_feat_objects_singleDay, noaa_data):
    '''
        Takes acive region labels from noaa data and assigns those labels to the corresponding 
        active regions in the image data. Active regions in image data that does not have corresponding
        active regions in noaa data, will have new labels genreated. 
        
        INPUT: list of dictionaries, each dictionary contains feature info for each image of that day
               keys: active region number, fits file header info (date, time, ...)
               values: extract features (AR centroids in pixel space and in lat/long space, mag flux, ...)
               
               noaa_data, dictionary
               keys: years
               values: image features (date, AR_lable, lat/long, ...)
               
        OUTPUT: same as input, list of dictionarys but with active region labels assinged to sunspots 
    '''
    metrics = [] # list of metrics collected for each image
    metric_results = dict()
        
    # get noaa active region centroids for current day, as current image, and for previous day
    nova_cents_sameDay,\
    nova_cents_prevDay,\
    same_day_df,\
    previous_day_df = get_currentDay_previusDay_noaa_activeRegions(new_feat_objects_singleDay,
                                                                   noaa_data)
    # loop through images taken in a single day
    for time_slice in xrange(len(new_feat_objects_singleDay)):
        
        image_metrics = dict() # metrics collected a sinlge image
        # both reflect overall perfomrance & what NOAA considers an active region
        # both reflect flux parameter & what we consider an active region 
        total_labels_assinged = 0 
        total_labels_generated = 0
        
        total_image_AR = 0 # reflects counting bug in code
        total_image_FP = 0 # '0' AR number (reflects min_pixel in object dection code)
        total_empty_lists = 0 # (image AR was not assinged to noaa label, reflects min_dist radius )
        
        
        # get hour that image was taken
        image_hour = int(new_feat_objects_singleDay[time_slice]["image_time"][0:2])
        
        # get image centroids
        image_cents = extract.get_image_active_region_centroids(new_feat_objects_singleDay[time_slice])

        # get noaa/image active regions pairs
        # if time is before noon, then use prevDay noaa centroids
        if image_hour <= 12:
            #print "use previous day noaa_cents"
            ar_pairs = extract.get_shortest_distance_pair(image_cents, nova_cents_prevDay)
        # if time is after noon, then use currDay noaa centroids
        else:
            #print "use current day noaa_cents"
            ar_pairs = extract.get_shortest_distance_pair(image_cents, nova_cents_sameDay)

        # check and eliminate repeated noaa AR label assignments     
        ar_pairs = extract.check_repeating_noaa_assignments(ar_pairs)
        
        # get all image active region numbers
        image_ar_list = new_feat_objects_singleDay[time_slice].keys()[:-4]
        total_image_AR = len(image_ar_list)
        
        # get all image active region numbers assigned to noaa labels 
        assigned_image_ar_list = ar_pairs.keys()
        
        #print "ar_pairs ", ar_pairs
        #print "assigned ", assigned_image_ar_list
        #print "image_ar_list ",image_ar_list, '\n'
        # loop through active regions for each image
        for image_ar in image_ar_list:
            # 0's are assinged to small patches of flux that made it through the object detection
            # but are not active regions -- False Positives 
            # 0's present in image_feat_dict are FP
            # 0's present in ar_pairs ARE NOT FP, they are correct AR numbers 
            if image_ar == 0: 
                #print "Image Active region index {}".format(image_ar)
                #print "False Positive zero -- generate active region number\n"
                total_image_FP += 1
                total_labels_generated += 1
                
            elif image_ar in assigned_image_ar_list:
                try:
                    noaaAR_num = ar_pairs[1][0].keys()[0]   
                    ar_label = same_day_df.iloc[noaaAR_num]['activeRegionNum']
                    new_feat_objects_singleDay[time_slice][image_ar]["activeRegionNum"] = ar_label
                    #print "assinged label to image AR {}".format(image_ar)
                    total_labels_assinged += 1
                except IndexError :
                    #print "Emptry List: No noaa labels assinged to image data"
                    #print "generate active region number for image AR {}".format(image_ar)
                    total_empty_lists += 1
                    total_labels_generated += 1
                    
            else:
                #print "generate active region number for image AR {}".format(image_ar)
                total_labels_generated += 1
                
        # metrics 
        image_metrics["labels_assinged"] = total_labels_assinged
        image_metrics["labels_generated"] = total_labels_generated
        image_metrics["image_AR"] = total_image_AR
        image_metrics["image_FP"] = total_image_FP
        image_metrics["empty_lists"] = total_empty_lists
        image_metrics["time"] = new_feat_objects_singleDay[time_slice]["image_time"]
        image_metrics["date"] = new_feat_objects_singleDay[time_slice]["image_date"]

        
        #metrics["date"] = image_metrics
        metrics.append(image_metrics)
        
    date = new_feat_objects_singleDay[time_slice]["image_date"]
    metric_results[date] = metrics
        
        
    return new_feat_objects_singleDay, metric_results

## ToDo
    Create list of dicts for metrics log
        total labels assinged
        total laebls not-assinged/generated
        total AR of image 
        total false positives, '0' AR number (reflects min_pixel in object dection code)
        total empty lists (image AR was not assinged to noaa label, reflects min_dist radius )
        date id
        time id
    Create AR label generator
        need to think of how keep track of AR that have been assinged a label from a previous day
        don't want to assinged 2 or more different labels to the same AR !!!
        

In [85]:
test, metrics = get_active_region_labels(new_feat_objects_singleDay, noaa_data)

In [81]:
# ToDo
# Move resutls to file
# make sure that results save in a consistent order and structure during parallelization
# restructure master image data into years/days/hours - ask David how to read the fits file names 

In [88]:
# write function that aggregates results 
metrics

{'2003 10 02': [{'date': '2003 10 01',
   'empty_lists': 0,
   'image_AR': 6,
   'image_FP': 1,
   'labels_assinged': 1,
   'labels_generated': 5,
   'time': '00:03:03'},
  {'date': '2003 10 01',
   'empty_lists': 2,
   'image_AR': 6,
   'image_FP': 0,
   'labels_assinged': 0,
   'labels_generated': 6,
   'time': '01:39:03'},
  {'date': '2003 10 01',
   'empty_lists': 2,
   'image_AR': 6,
   'image_FP': 0,
   'labels_assinged': 0,
   'labels_generated': 6,
   'time': '02:59:03'},
  {'date': '2003 10 01',
   'empty_lists': 0,
   'image_AR': 5,
   'image_FP': 0,
   'labels_assinged': 0,
   'labels_generated': 5,
   'time': '20:51:03'},
  {'date': '2003 10 01',
   'empty_lists': 0,
   'image_AR': 5,
   'image_FP': 0,
   'labels_assinged': 0,
   'labels_generated': 5,
   'time': '22:27:03'},
  {'date': '2003 10 02',
   'empty_lists': 0,
   'image_AR': 5,
   'image_FP': 1,
   'labels_assinged': 0,
   'labels_generated': 5,
   'time': '01:39:03'},
  {'date': '2003 10 02',
   'empty_lists': 0

## Modify code base so that it accpets values for parameters - Done
    get_active_region_map (extract_features_script) --> flux_magnitude_filter, current 130
        This parameter determines the flux magnitude cut-off.
    
        Raise value to increase flux regions that get zeroed out.
            Raising value prevents actual sunspots (with lower magnitude) 
            from being considered psudeo-sunspots (False Negatives).
            
            
        Drop value to decrease flux regions that get zeroed out.
            Droping value prevents psudeo-sunspots (with lower magnitudes)
            from being considered actual sunspots (False Positives).
        
    get_active_region_map (extract_features_script) --> kernal_std, current 10 
        This parameter essentially defines the boundary of an active region. 
     
        Raise value to increase smoothing in, and between, flux regions 
        which increases the surface area of the active region.
        Could lead to False Positives by inclduing non-related flux patches 
        into active regions.
            
        Drop value to decrease smoothing in, and between, flux regions
        which decreases the surface area of the active region. 
        Could lead to False Negatigves by excluding related flu patches 
        into acive regions. 
        
    identify_active_regions (sunspot_feature_extraction) --> num_pixel_in_active_region, current 100
        This parameter determinds the minimum size of an active region, cut-off number for pixels in active region. 
        
        Raise value to incease minimum size of acitive regions. 
        
        Drop value to decrease minimum size of active regions. 
        
    

### Move Extracted Features into Text 

In [None]:
# Notes from talk with David - Jan 22, 2016
# Saving results to text file
# Image Name should HMIYYYY.txt (i.e. discripter date . txt )
# header: |YYYY MM DD| |hh:mm:ss| |Active Region #| |Flux| |Longitude| |Latitude|
#                                  xxxxxx[letter]     +/-     xxx.xx     xxx.xx
# unidentifed active regions, label them starting at 100,000 
# will have to map pixels to longitude and latitude 
# can provide lead digit to indicate that we have observed an active region that NOAA has not
# we want the carinton longitude 
# 
# one active region 
#
# header: |YYYY MM DD| |hh:mm:ss| |Active Region #| |Flux| |Longitude| |Latitude|
#                                  xxxxxx[letter]     +/-     xxx.xx     xxx.xx
#            n1             n2    get_active(n1,n2)  n4         n5        n6   
#            n7             n8    get_active(n7,n8)  n9         n10       n11

In [None]:
db_path = "/Users/Alexander/NASA/Database/"

In [None]:
# active regions without a label don't have a 'activeRegionNum' --> KeyError
def save_features_to_file(path, single_image):
    rows = []
    for feat_key, feat_val in single_image.iteritems():
        # filter out non-active regions and false positive active regions
        if isinstance(feat_key, int) and feat_key > 0:
            # get postive value features for active region
            row = [single_image['image_date'], 
                   single_image['image_time'], 
                   feat_val['activeRegionNum']+"P", # positive flux label
                   feat_val['pos_net_flux'], 
                   feat_val['long_pos'], 
                   feat_val['lat_pos']]
            rows.append('\t'.join(map(str,row)))
            # get negative value features for active region
            row = [single_image['image_date'],
                   single_image['image_time'], 
                   feat_val['activeRegionNum']+"N", # negative flux label
                   feat_val['neg_net_flux'], 
                   feat_val['long_neg'], 
                   feat_val['lat_neg']]
            rows.append('\t'.join(map(str,row)))

    # save active region features to text file
    pd.DataFrame(rows).to_csv(path + 'features.txt', 
                              index = False, 
                              header=False, 
                              mode='a')

In [None]:
for image in test:
    save_features_to_file(db_path, image)

###Choose Image to View (Image 1)

In [None]:
current_year, scan_year = get_currentYear_get_scanDate(new_feat_objects_singleDay, time_slice)

In [None]:
# |date|  |ar_num| |lat/long| |clon| |area(Msolar-hemishpere)|
df = pd.DataFrame(noaa_data[current_year])
df.columns = ["date", 'activeRegionNum', "latLong", "clon", "area", 5,6,7,8]
df = df[["date", 'activeRegionNum', "latLong", "clon", "area"]]

In [None]:
df[scan_year == df.date]

##Plot Sunspots & Active Regions

###Choose Image to View 
        use scan_year to shift through noaa observations
        use time_slice to scan through image data

In [None]:
scan_year_singleDay = "20031001"
# these are NOAA active region centroid 
nova_x , nova_y, nova_cents  = extract.get_noaa_centroids(df, scan_year_singleDay)

In [None]:
# choose the kth image out of the list of sample images
time_slice = 0 # for '0', get first file from trans_filenames_singleDay list

In [None]:
current_year, scan_year = get_currentYear_get_scanDate(new_feat_objects_singleDay, time_slice)

In [None]:
# |date|  |ar_num| |lat/long| |clon| |area(Msolar-hemishpere)|
df = pd.DataFrame(noaa_data[current_year])
df.columns = ["date", 'activeRegionNum', "latLong", "clon", "area", 5,6,7,8]
df = df[["date", 'activeRegionNum', "latLong", "clon", "area"]]

In [None]:
scan_year_singleDay = "20031001"
# these are NOAA active region centroid 
nova_x , nova_y, nova_cents  = extract.get_noaa_centroids(df, scan_year)

In [None]:
# identifies image AR that are within 5 degrees of noaa AR
x_cents, y_cents, ar_cents = extract.get_image_active_region_centroids(new_feat_objects_singleDay[time_slice])

In [None]:
print scan_year
plot_sunSpots_on_activeRegions(trans_images_singleDay[time_slice],\
                               new_feat_objects_singleDay[time_slice],\
                               scan_year )

###NOAA Current Day

In [None]:
# NOAA -- end of day, Oct 1st
# Image -- start of day, Oct 1st
scan_year_singleDay = "20031001" # noaa obs
time_slice = 0 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

# Current code will take average position of sunspots and corresponding noaa label
# so both double hit" sunspots around S5350 will only be assigned to a single noaa label

In [None]:
# NOAA -- end of day, Oct 1st
# Image -- middle of day, Oct 1st
scan_year_singleDay = "20031001" # noaa obs
time_slice = 9 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

In [None]:
# NOAA -- end of day, Oct 1st
# Image -- end of day, Oct 1st
scan_year_singleDay = "20031001" # noaa obs
time_slice = 4 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

###NOAA Previous Day 

In [None]:
# NOAA -- end of day, Sept 30st
# Image -- start of day, Oct 1st
scan_year_singleDay = "20030930" # noaa obs
time_slice = 0 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

In [None]:
# NOAA -- end of day, Sept 30st
# Image -- middle of day, Oct 1st
scan_year_singleDay = "20030930" # noaa obs
time_slice = 9 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

In [None]:
# NOAA -- end of day, Sept 30st
# Image -- end of day, Oct 1st
scan_year_singleDay = "20030930" # noaa obs
time_slice = 4 # image
plot_sunspots_and_active_regions(df, scan_year_singleDay,\
                                 new_feat_objects_singleDay,\
                                 time_slice)

In [1]:
range(0,1)

[0]