In [1]:
# import libraries related to querying links and downloading files from the web
import os
from IPython.display import Video
from tqdm import tqdm
import importlib
from IPython.display import display
import ipywidgets as widgets
import numpy as np
import csv
import pandas as pd
from datetime import datetime
import re
import pipmag as pm

In [2]:
# reload the pipmag module to make sure that the latest version is used
importlib.reload(pm)
print('Imported updated pipmag.py')

Imported updated pipmag.py


In [3]:
# Print the years for which the La Palma Observatory has data at UiO
obs_years = pm.get_obs_years()

In [4]:
# Get the observing dates for all the years
obs_dates = pm.get_obs_dates(obs_years)
obs_dates_list = pm.get_obs_dates_list(obs_dates)
# print the first, last and total number of observing dates
print(f'first entry: {obs_dates_list[0]}\nlast entry : {obs_dates_list[-1]}\ntotal observing dates: {len(obs_dates_list)}')

first entry: 2021-06-23
last entry : 2015-10-11
total observing dates: 109


In [5]:
# check if all_media_links.pkl exists then load the pickle file, otherwise get the links
if not os.path.isfile('all_media_links.pkl'):
    video_links = pm.get_video_liks(obs_dates) # get the video links, one for each observing date
    image_links = pm.get_image_links(obs_dates) # get the image links, one for each observing date
    all_image_links = pm.get_all_links(image_links) # get all the image links, one for each image
    all_video_links = pm.get_all_links(video_links) # get all the video links, one for each video
    # print the number of video and image links and all the video and image links 
    print(f'number of video links: {len(all_video_links)}\nnumber of image links: {len(all_image_links)}')
    print(f'video links: {len(all_video_links)}\nimage links: {len(all_image_links)}')
    all_media_links = all_image_links + all_video_links # combine the image and video links
    all_media_links = sorted(all_media_links) #sort the list of links
    # print the total number of media links
    print(f'total number of media links: {len(all_media_links)}')
    # save all the media links as a pickle file
    pm.save_pickle(all_media_links, 'all_media_links.pkl')
else:
    # load the pickle file
    all_media_links = pm.load_pickle('all_media_links.pkl')
    print(f'total number of media links: {len(all_media_links)}')

loaded all_media_links.pkl successfully
total number of media links: 6861


In [6]:
# get the date and time from the links and find the links that do not have date and time and save them as a list
date_time_from_all_media_links, date_time_not_found = pm.get_date_time_from_link_list(all_media_links)
# remove all the links that do not have a date and time from all_media_links
all_media_links_with_date_time = [link for link in all_media_links if link not in date_time_not_found]
# print the number of links that contain date and time and the number of links that do not contain date and time
print(f'number of links with date and time: {len(all_media_links_with_date_time)}\nnumber of links without date and time: {len(date_time_not_found)}')
invalid_date = pm.get_invalid_dates(date_time_from_all_media_links)

number of links with date and time: 6817
number of links without date and time: 44
All dates in date_time_list are valid


In [7]:
# remove the entries from date_time_from_all_media_links that are not in the correct format
# date_time_from_all_media_links = [date for date in date_time_from_all_media_links if date not in invalid_dates]

In [8]:
# convert the date and time to datetime format
date_time_from_all_media_links_datetime = pm.convert_to_datetime(date_time_from_all_media_links)

In [9]:
# get the unique date_time_from_all_media_links_datetime  values
unique_date_time_from_all_media_links_datetime = list(set(date_time_from_all_media_links_datetime))
# print the number of unique date_time_from_all_media_links_datetime values
print(f'number of unique date_time_from_all_media_links_datetime values: {len(unique_date_time_from_all_media_links_datetime)}')

number of unique date_time_from_all_media_links_datetime values: 749


In [19]:
# create a dataframe with the date_time_from_all_media_links_datetime as the index and the all_media_links as the column
df = pd.DataFrame(all_media_links_with_date_time, index=date_time_from_all_media_links_datetime, columns=['links'])
#print first, last and total number of entries in the dataframe
print(f'first entry: {df.index[0]}\nlast entry : {df.index[-1]}\ntotal entries: {len(df.index)}')

first entry: 2013-06-30 09:15:50
last entry : 2022-09-08 08:40:11
total entries: 6817


In [20]:
# group the dataframe by the time index and combine the links into a list
df = df.groupby(df.index).agg({'links': lambda x: list(x)})
# print the first, last and total number of entries in the dataframe
print(f'first entry: {df.index[0]}\nlast entry : {df.index[-1]}\ntotal entries: {len(df.index)}')

first entry: 2013-06-30 09:15:50
last entry : 2022-09-17 14:34:13
total entries: 749


In [21]:
# add a column called 'obs_id' and set it equal to the row number of the dataframe
# add the 'id' column
df['obs_id'] = range(1, len(df) + 1)
# set the index as 'obs_id' and add a column for the date and time
df['date_time'] = df.index
df = df.set_index('obs_id')
# add a column for the number of links in each row
df['num_links'] = df['links'].apply(lambda x: len(x))
# add columns for the year, month and day to the dataframe
df['year'] = df['date_time'].apply(lambda x: x.year)
df['month'] = df['date_time'].apply(lambda x: x.month)
df['day'] = df['date_time'].apply(lambda x: x.day)
# add a column for the time of day
df['time'] = df['date_time'].apply(lambda x: x.time())
# add a column called 'target' and set it equal to None
df['target'] = None
instrument_keywords={'CRISP': ['wb_6563','ha','Crisp-R'],'CROMIS':['Chromis'],'IRIS':['sji']}
# apply the get_instrument_info function to the 'links' column of the dataframe and add the result to a new column called 'instruments'
df['instruments'] = df['links'].apply(lambda x: pm.get_instrument_info(x, instrument_keywords))
# apply the get_links_with_string function to the 'links' column of the dataframe with the strings 'mp4' and 'mov' and add the result to a new column called 'video_links'
df['video_links'] = df['links'].apply(lambda x: pm.get_links_with_string(x, ['mp4','mov']))
# apply the get_links_with_string function to the 'links' column of the dataframe with the strings 'jpg' and 'png' and add the result to a new column called 'image_links'
df['image_links'] = df['links'].apply(lambda x: pm.get_links_with_string(x, ['jpg','png']))
#pm.get_links_with_string(df.iloc[0]['links'], ['mp4','mov'])
# make the columns date-time, year, month, day, time, instruments, target, video_links, image_links, links, num_links
df = df[['date_time', 'year', 'month', 'day', 'time', 'instruments', 'target', 'video_links', 'image_links', 'links', 'num_links']]
# print a summary of the dataframe
df.info()
# save the dataframe as a pickle file
df.to_pickle('la_palma_obs_data.pkl')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 749 entries, 1 to 749
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date_time    749 non-null    datetime64[ns]
 1   year         749 non-null    int64         
 2   month        749 non-null    int64         
 3   day          749 non-null    int64         
 4   time         749 non-null    object        
 5   instruments  534 non-null    object        
 6   target       0 non-null      object        
 7   video_links  749 non-null    object        
 8   image_links  749 non-null    object        
 9   links        749 non-null    object        
 10  num_links    749 non-null    int64         
dtypes: datetime64[ns](1), int64(4), object(6)
memory usage: 70.2+ KB


In [22]:
df = pd.read_pickle('la_palma_obs_data.pkl')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 749 entries, 1 to 749
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date_time    749 non-null    datetime64[ns]
 1   year         749 non-null    int64         
 2   month        749 non-null    int64         
 3   day          749 non-null    int64         
 4   time         749 non-null    object        
 5   instruments  534 non-null    object        
 6   target       0 non-null      object        
 7   video_links  749 non-null    object        
 8   image_links  749 non-null    object        
 9   links        749 non-null    object        
 10  num_links    749 non-null    int64         
dtypes: datetime64[ns](1), int64(4), object(6)
memory usage: 70.2+ KB


In [48]:
#import interact 
from ipywidgets import interact
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

In [99]:
df.iloc[0]['date_time']

Timestamp('2013-06-30 09:15:50')

In [103]:
df[df['date_time'] == df.iloc[0]['date_time']]['video_links'].values[0]

['http://tsih3.uio.no/lapalma/2013/2013-06-30//./wb_6563_2013-06-30T09:15:50_scans=0-2133_minmax.mp4']

In [105]:
# print all the video links for a given date_time
def print_video_links(date_time):
    # get the video links for the given date_time
    video_links = df[df['date_time'] == date_time]['links'].values[0]
    # print the video links
    for link in video_links:
        print(link)
print_video_links(df.iloc[0]['date_time'])

http://tsih3.uio.no/lapalma/2013/2013-06-30//./wb_6563_2013-06-30T09:15:50_scans=0-2133_histoopt.mp4
http://tsih3.uio.no/lapalma/2013/2013-06-30//./wb_6563_2013-06-30T09:15:50_scans=0-2133_minmax.mp4
http://tsih3.uio.no/lapalma/2013/2013-06-30//halpha_SDO_8pan_2013-06-30_091550.mp4


In [None]:

def get_links(date_time):
    return df[df['date_time'] == date_time]['video_links'].values[0]

# function that takes a list of movie links and displays them
def display_movies(links):
    for link in links:
        display(widgets.HTML('<video width="720" height="640" controls><source src="{}" type="video/mp4"></video>'.format(link)))
        print(120*'=')
        
def print_links(links):
    for link in links:
        print(link)
    print(120*'=')
# Create a dropdown widget for the date_time column
date_time_dropdown = widgets.Dropdown(options=df['date_time'], description='Date Time:')

# Create an output widget to display the selected links
output = widgets.Output()

# Create a function that updates the output widget when the dropdown value changes
def dropdown_value_changed(change):
    with output:
        clear_output()
        links = get_links(change.new)
        for link in links:
            display(Video(link))
        #display_movies(links)
        #print_links(links)

# Register the function to be called when the dropdown value changes
date_time_dropdown.observe(dropdown_value_changed, names='value')

# Display the dropdown widget and the output widget
display(date_time_dropdown)
display(output)


In [167]:
def get_links(date_time):
    return df[df['date_time'] == date_time]['video_links'].values[0]
        
# Create a dropdown widget for the date_time column
date_time_dropdown = widgets.Dropdown(options=df['date_time'], description='Date Time:')

# Create a dropdown widget for the movie links column
links_dropdown = widgets.Dropdown(options=[], description='Movie Links:')

# Create a variable to store the selected link
selected_link = ''

# Create an output widget to display the selected link
output = widgets.Output()

# Function to update the links dropdown based on the selected date_time
def update_links(change):
    date_time = change.new
    links = get_links(date_time)
    links_dropdown.options = links

# Function to update the selected link when the links dropdown value changes
def links_value_changed(change):
    global selected_link
    selected_link = change.new

# Register the function to be called when the date_time dropdown value changes
date_time_dropdown.observe(update_links, names='value')

# Register the function to be called when the links dropdown value changes
links_dropdown.observe(links_value_changed, names='value')

# Display the dropdown widgets and the output widget
display(date_time_dropdown)
display(links_dropdown)

def display_selected_link_button(b):
    with output:
        clear_output()
        #display_selected_link(selected_link)
        display(Video(selected_link))

display_button = widgets.Button(description='Show')
display_button.on_click(display_selected_link_button)
display(display_button)
display(output)


Dropdown(description='Date Time:', options=(Timestamp('2013-06-30 09:15:50'), Timestamp('2014-09-09 08:13:40')…

Dropdown(description='Movie Links:', options=(), value=None)

Button(description='Show', style=ButtonStyle())

Output()

In [209]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6817 entries, 1 to 6817
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date_time    6817 non-null   datetime64[ns]
 1   links        6817 non-null   object        
 2   num_links    6817 non-null   int64         
 3   instruments  0 non-null      object        
 4   video_links  6817 non-null   object        
 5   image_links  6817 non-null   object        
 6   target       0 non-null      object        
 7   year         6817 non-null   int64         
 8   month        6817 non-null   int64         
 9   day          6817 non-null   int64         
 10  time         6817 non-null   object        
 11  link_name    6817 non-null   object        
dtypes: datetime64[ns](1), int64(4), object(7)
memory usage: 692.4+ KB


In [186]:
# Create a dropdown widget for the year column
year_dropdown = widgets.Dropdown(options=df['year'].unique(), description='Year:')

# Create a dropdown widget for the month column
month_dropdown = widgets.Dropdown(options=[], description='Month:')

# Create a dropdown widget for the day column
day_dropdown = widgets.Dropdown(options=[], description='Day:')

# Create a dropdown widget for the time column
time_dropdown = widgets.Dropdown(options=[], description='Time:')

# Create a dropdown widget for the links column
links_dropdown = widgets.Dropdown(options=[], description='Links:')

# Create a variable to store the selected link
selected_link = ''

# Create a variable to store the selected link for display
selected_link_for_display = ''

# Create an output widget to display the selected link
output = widgets.Output()

# Function to update the month dropdown based on the selected year
def update_months(change):
    year = change.new
    months = df[df['year'] == year]['month'].unique()
    month_dropdown.options = months

# Function to update the day dropdown based on the selected month and year
def update_days(change):
    year = year_dropdown.value
    month = change.new
    days = df[(df['year'] == year) & (df['month'] == month)]['day'].unique()
    day_dropdown.options = days

# Function to update the time dropdown based on the selected day, month, and year
def update_time(change):
    year = year_dropdown.value
    month = month_dropdown.value
    day = change.new
    time = df[(df['year'] == year) & (df['month'] == month) & (df['day'] == day)]['time'].unique()
    time_dropdown.options = time

# Function to update the links dropdown based on the selected time
def update_links(change):
    time = change.new
    links = list(df[df['time'] == time]['links'].values[0])
    links_dropdown.options = links

# Function to update the selected link when the links dropdown value changes
def links_value_changed(change):
    global selected_link
    global selected_link_for_display
    selected_link = change.new
    selected_link_for_display = selected_link

# Function to display the selected link when the display button is pressed
def display_selected_link(b):
    with output:
        clear_output()
        display(Video(selected_link_for_display, html_attributes='controls autoplay loop'))

# Register the functions to be called when the year, month, and day dropdown values change
year_dropdown.observe(update_months, names='value')
month_dropdown.observe(update_days, names='value')
day_dropdown.observe(update_time, names='value')

# Register the function to be called when the time dropdown value changes
time_dropdown.observe(update_links, names='value')

# Register the function to be called when the links dropdown value changes
links_dropdown.observe(links_value_changed, names='value')

#Create a button widget to display the selected link
display_button = widgets.Button(description='Show')
display_button.on_click(display_selected_link)

#Display the dropdown widgets, the button, and the output widget
display(year_dropdown)
display(month_dropdown)
display(day_dropdown)
display(time_dropdown)
display(links_dropdown)
display(display_button)
display(output)


Dropdown(description='Year:', options=(2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022), value=2013…

Dropdown(description='Month:', options=(), value=None)

Dropdown(description='Day:', options=(), value=None)

Dropdown(description='Time:', options=(), value=None)

Dropdown(description='Links:', options=(), value=None)

Button(description='Show', style=ButtonStyle())

Output()

In [158]:
url1=df.loc[1]['video_links'][0]
# write a function that plays the mp4 file in the url
def play_mp4(url):
    return Video(url, width=720, height=480)

In [159]:
play_mp4(url1)

In [79]:
# find the total number of observations
total_obs = len(df)
print(f"Total number of observations: {total_obs}")

Total number of observations: 749


In [80]:
# find the total number of links for all dates and times
total_links = len(all_media_links_with_date_time)
print(f"Total number of links: {total_links}")

Total number of links: 6817


In [86]:
# find the obsid with the most links
max_links = df['links'].value_counts().max()
print(f"Maximum number of links for a single observation: {max_links}")


Maximum number of links for a single observation: 1


In [66]:
df.iloc[3]['links']

'http://tsih3.uio.no/lapalma/2014/2014-09-09//ha+ca+sji_6pan_2014-09-09_20140909_075943_3860256865.mp4'

In [32]:
all_date_time_from_image_list = pm.get_date_time_from_link_list(all_image_links)

In [33]:
all_date_time_from_video_list = pm.get_date_time_from_link_list(all_video_links)

In [34]:
len(all_date_time_from_image_list)

2945

In [35]:
len(all_date_time_from_video_list)

3345

In [36]:
all_date_time_from_image_list[0]

'2018-08-28_13:45:09'

In [37]:
all_date_time_from_video_list[0]

'2013-06-30_09:15:50'

In [None]:
# combine the two lists
all_date_time_from_image_list.extend(all_date_time_from_video_list)
# sort the list
all_date_time_from_image_list.sort()
# remove duplicates

In [293]:
# create a dataframe with the date and time as columns
df = pd.DataFrame({'date': all_dates, 'time': all_times})
# add a column with the datetime object combining the date and time
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
# sort the dataframe by the datetime column
df = df.sort_values(by='datetime')
# use datetime as index
df = df.set_index('datetime')
df.head()

Unnamed: 0_level_0,date,time
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-08-28 13:45:09,2018-08-28,13:45:09
2018-08-28 13:45:09,2018-08-28,13:45:09
2021-06-22 08:17:48,2021-06-22,08:17:48
2021-06-23 07:56:27,2021-06-23,07:56:27
2021-06-23 08:35:52,2021-06-23,08:35:52


In [294]:
# add image links to the dataframe as a column
df['image_link'] = all_image_links
df.head()

Unnamed: 0_level_0,date,time,image_link
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-08-28 13:45:09,2018-08-28,13:45:09,http://tsih3.uio.no/lapalma/2018/2018-08-28//....
2018-08-28 13:45:09,2018-08-28,13:45:09,http://tsih3.uio.no/lapalma/2018/2018-08-28//....
2021-06-22 08:17:48,2021-06-22,08:17:48,http://tsih3.uio.no/lapalma/2021/2021-06-22//....
2021-06-23 07:56:27,2021-06-23,07:56:27,http://tsih3.uio.no/lapalma/2021/2021-06-23//C...
2021-06-23 08:35:52,2021-06-23,08:35:52,http://tsih3.uio.no/lapalma/2021/2021-06-23//C...


In [None]:
# add column called 'instrum

In [236]:
all_image_links[4]

'http://tsih3.uio.no/lapalma/2021/2021-06-23//Crisp-T_quick_2021-06-23_083552.jpg'

In [189]:
# define a regex pattern to capture the observing date from the image link
import re
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
date_string = pattern.findall(all_image_links[0])[0]
date_string

'2018-08-28'

In [190]:
# define a regex pattern to capture the observing time from the image link
pattern = re.compile(r'\d{2}:\d{2}:\d{2}')
time_string = pattern.findall(all_image_links[0])[0]
time_string

'13:45:09'

In [202]:
date_pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
list(set(date_pattern.findall(all_image_links[0])))[0]


'2018-08-28'

In [209]:
all_image_links[-5:]

['http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11.jpg',
 'http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_+0_scan=186.jpg',
 'http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_+657_scan=4.jpg',
 'http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_-657_scan=4.jpg',
 'http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3999_4000_+1249_scan=76.jpg']

In [None]:
# write a regex that matches the date in either 'YYYY-MM-DD' or 'YYYY.MM.DD' format
date_pattern = re.compile(r'\d{4}[-.]\d{2}[-.]\d{2}')
# write a regex that matches the time in either 'HH:MM:SS', 'HH.MM.SS' or 'HHMMSS' format and it should not match a longer string
time_pattern = re.compile(r'\d{2}[:.]\d{2}[:.]\d{2}')

In [215]:
date_pattern = re.compile(r'\d{4}[-.]\d{2}[-.]\d{2}')
date_pattern.findall(all_image_links[-1])[1]

'2022-09-08'

In [230]:
# parse the date and time in the string = '2021-06-23_075627'
test_string = '2021-06-23_075627'
date_pattern = re.compile(r'\d{4}[-.]\d{2}[-.]\d{2}')
time_pattern = re.compile(r'\d{2}[:.]\d{2}[:.]\d{2}')
date_string = date_pattern.findall(test_string)[0]
time_string = time_pattern.findall(test_string)[0]
print(f'date: {date_string}\ntime: {time_string}')

IndexError: list index out of range

In [217]:
all_image_links[-1]

'http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3999_4000_+1249_scan=76.jpg'

In [224]:
time_pattern = re.compile(r'(?<=_)(\d{2}[:.]){2}\d{2}|\d{6}(?=_)')
time_pattern.findall(all_image_links[3])

[]

In [222]:
all_image_links[3]

'http://tsih3.uio.no/lapalma/2021/2021-06-23//Crisp-T_quick_2021-06-23_075627.jpg'

In [220]:
# run through the all_image_links list and extract the observing date and time for each image and return a datetime object for each image
date_pattern = re.compile(r'\d{4}[-.]\d{2}[-.]\d{2}')
# match 
time_pattern = re.compile(r'(?<=_)\d{2}[:.]\d{2}[:.]\d{2}(?=_)')
date_time_list = []
for image_link in all_image_links[:5]:
    date_string = date_pattern.findall(image_link)[0]
    time_string = time_pattern.findall(image_link)
    print(date_string, time_string)


2018-08-28 ['13:45:09']
2018-08-28 ['13:45:09']
2021-06-22 ['08:17:48']
2021-06-23 []
2021-06-23 []


In [191]:
# define a regex pattern to caputure the instument name from the image link using an instument name list
instrument_list = ['Crisp-T']
pattern = re.compile(r'(?<=/)\w+(?=/)')
instrument_string = pattern.findall(all_image_links[0])[0]
instrument_string

'lapalma'

In [183]:
# define a function that parses date and time from the file name and returns a list of dictionaries with the date and time as keys and the file name as value
def get_date_time(all_image_links):
    date_time = []
    for link in all_image_links:
        # get the file name from the link
        file_name = link.split('/')[-1]
        # parse the date and time from the file name
        date = file_name.split('_')[0]
        time = file_name.split('_')[1]
        # append the date and time to the date_time list
        date_time.append({'date': date, 'time': time, 'file_name': file_name})
    return date_time

In [184]:
obs_date_time = get_date_time(get_all_image_links(image_links))

In [186]:
obs_date_time[2]

{'date': 'Chromis-N',
 'time': 'quick',
 'file_name': 'Chromis-N_quick_2021-06-22_08:17:48_4862_4861_-600_scan=232.jpg'}

In [178]:
all_image_links = get_all_image_links(image_links)

In [182]:
len(all_image_links)

2945

In [129]:
obs_dates_list = pm.get_obs_dates_list(obs_dates)

In [164]:
# create a pandas dataframe with obs_dates_list as index and no columns, sort the index
obs_dates_df = pd.DataFrame(index=obs_dates_list)
obs_dates_df.sort_index(inplace=True)
# use the obs_dates_list as index for the video_links and image_links lists
obs_dates_df['videos'] = video_links
obs_dates_df['images'] = image_links
# add the numeber of videos and images for each date as columns
obs_dates_df['num_videos'] = obs_dates_df['videos'].apply(len)
obs_dates_df['num_images'] = obs_dates_df['images'].apply(len)
obs_dates_df

Unnamed: 0,videos,images,num_videos,num_images
2013-06-30,[http://tsih3.uio.no/lapalma/2013/2013-06-30//...,File not found,7,14
2013-09-01,[http://tsih3.uio.no/lapalma/2013/2013-09-01//...,File not found,4,14
2014-09-09,[http://tsih3.uio.no/lapalma/2014/2014-09-09//...,File not found,7,14
2014-09-15,[http://tsih3.uio.no/lapalma/2014/2014-09-15//...,File not found,2,14
2015-06-18,[http://tsih3.uio.no/lapalma/2015/2015-06-18//...,File not found,4,14
...,...,...,...,...
2022-09-13,[http://tsih3.uio.no/lapalma/2022/2022-09-13//...,[http://tsih3.uio.no/lapalma/2022/2022-09-13//...,14,14
2022-09-14,[http://tsih3.uio.no/lapalma/2022/2022-09-14//...,[http://tsih3.uio.no/lapalma/2022/2022-09-14//...,91,91
2022-09-15,[http://tsih3.uio.no/lapalma/2022/2022-09-15//...,[http://tsih3.uio.no/lapalma/2022/2022-09-15//...,48,48
2022-09-16,[http://tsih3.uio.no/lapalma/2022/2022-09-16//...,[http://tsih3.uio.no/lapalma/2022/2022-09-16//...,43,43


In [None]:
obs_dates_df.loc['2022-09-03', 'videos']

In [163]:
# print the number of videos for df entry for '2022-09-03'
print(f'Number of videos for 2022-09-03: {len(obs_dates_df.loc["2022-09-03", "videos"])}')

Number of videos for 2022-09-03: 72


In [137]:
#import pandas as pd and create a dataframe with the dates as index and the links as columns

df = pd.DataFrame({'video_links': video_links, 'image_links': image_links}, index=obs_dates_list)
df.index.name = 'obs_dates'
df

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

In [134]:
# get all the video links for a given date
def get_video_links(date):
    return df.loc[date]['video_links']

In [99]:
pm.print_obs_dates('2015',obs_dates)

first: 2015/2015-06-18, last: 2015/2015-10-11, total: 4
01: 2015-06-18
02: 2015-09-05
03: 2015-09-09
04: 2015-10-11


In [75]:
pm.find_obs_dates('2015-09-',obs_dates)

01: 2015-09-05
02: 2015-09-09


In [76]:
obs_dates_list = pm.get_obs_dates_list(obs_dates)

In [77]:
obs_dates_list[110]

'2022-09-08'

In [78]:
obs_dates[110]

'2022/2022.09.08/'

In [126]:
len(obs_dates_list)

111

In [127]:
# for the obs_dates list, get the list of files with either .mp4 or .mov extension and save it as a dictionary wih the key being the observing date if the files are not founds then add a None value to the dictionary
file_video_links = 'video_links.csv'

if os.path.isfile(file_video_links) is False:
    video_links = {}
    i = 0
    for obs_date in tqdm(obs_dates):
        # get the list of files with either .mp4 or .mov extension
        files = pm.get_files(lapalma_url + obs_date + '/', '.mp4') + pm.get_files(lapalma_url + obs_date + '/', '.mov')
        # if the list is not empty, save it as a dictionary wih the key being the observing date
        key = obs_date[5:-1]
        # replace the dots with dashes
        key = key.replace('.', '-')
        if files:
            video_links[key] = files
        # if the list is empty, add a None value to the dictionary
        else:
            video_links[key] = 'File not found'

        # save the image_links in a csv file:
        w = csv.writer(open(file_video_links, "w"))
        for key, val in image_links.items():
            w.writerow([key, val])

else:
    # load the image_links in a csv file:
    reader = csv.reader(open(file_image_links, 'r'))
    image_links = {}
    for row in reader:
        k, v = row
        image_links[k] = v
    print(file_video_links+' loaded.')

100%|██████████| 111/111 [00:12<00:00,  9.13it/s]

video_links.csv loaded.





In [128]:
file_image_links = 'image_links.csv'

if os.path.isfile(file_image_links) is False:
    image_links = {}
    i = 0 
    for obs_date in tqdm(obs_dates):
        # get the list of files with either .mp4 or .mov extension
        files = get_files(lapalma_url + obs_date + '/', '.jpg')
        # if the list is not empty, save it as a dictionary wih the key being the observing date
        key = obs_date[5:-1]
        # replace the dots with dashes
        key = key.replace('.', '-')
        if files:
            image_links[key] = files
        # if the list is empty, add a None value to the dictionary
        else:
            image_links[key] = 'File not found'
            
    # save the image_links in a csv file:
    w = csv.writer(open(file_image_links, "w"))
    for key, val in image_links.items():
        w.writerow([key, val])

else:
    # load the image_links in a csv file:
    reader = csv.reader(open(file_image_links, 'r'))
    image_links = {}
    for row in reader:
        k, v = row
        image_links[k] = v
    print(file_image_links+' loaded.')


image_links.csv loaded.


In [129]:
# write a function that plays the mp4 file in the url
def play_mp4(url):
    return Video(url, width=720, height=480)

In [130]:
# define a function called video that takes a date string and prints an enumerated list of the video files for that date from the video_links dictionary
def video(date_string):
    # if the date is in the dictionary, print the list of video files
    if date_string in video_links:
        for i, video_link in enumerate(video_links[date_string]):
            print(f'{i:02d}: {video_link}')
    # if the date is not in the dictionary, print a message
    else:
        print('No video files found')
    return None

In [131]:
# find_obs_dates('2020-09-')
find_obs_dates('2015-09-0')

01: 2015-09-05
02: 2015-09-09


In [132]:
# video('2020-09-29')
video('2015-09-09')

00: http://tsih3.uio.no/lapalma/2015/2015-09-09//Bz+Bh_09Sep2015.mp4
01: http://tsih3.uio.no/lapalma/2015/2015-09-09//Bz+Bh_09Sep2015_B250G.mp4
02: http://tsih3.uio.no/lapalma/2015/2015-09-09//hacore+blos+sji2796_3pan_2015-09-09_075958.mp4


In [133]:
# play_mp4(video_links['2020-09-29'][4])
play_mp4(video_links['2015-09-09'][1])

# Creating the database

In [134]:
import pandas as pd

raw_data = {
    'ID': ['2015-09-09 01'],
    'date': ['2015-09-09'],
    'url': ['2015-09-09'],
    'Target': ['Sunspot'],
    'Polarimetry': [False],
    'Instruments':['CRISP+CHROMIS'],
    'Photosphere': [False],
    'Chromosphere': [True],
    'Flux balance': [42],
    'Dynamism': ['Quiet'],
    }

df = pd.DataFrame(raw_data)
# Do not use print but display from IPython
# print(df)

In [None]:
# Read https://ipywidgets.readthedocs.io/en/8.0.2/examples/Widget%20List.html for more information.

In [135]:
# Create a pandas-table of all the movies in the database:
video_dates = list(video_links.keys())
ids = []; dates = []; urls = [];
for days in range(len(video_dates)):
    # Every id is the date and the video of that day
    for movie in range(len(video_links[video_dates[days]])):
        ids.append(video_dates[days]+'_'+str(movie))
        dates.append(video_dates[days])
        urls.append(video_links[video_dates[days]][movie])

# Creating the container for the database:    
from_links = {
    'ID': ids,
    'date': dates,
    'url': urls,
    'Target': np.empty_like(dates),
    }
db_from_links = pd.DataFrame(from_links)

# Display the database:
display(db_from_links)

Unnamed: 0,ID,date,url,Target
0,2013-06-30_0,2013-06-30,http://tsih3.uio.no/lapalma/2013/2013-06-30//halpha_SDO_8pan_2013-06-30_091550.mp4,
1,2013-06-30_1,2013-06-30,http://tsih3.uio.no/lapalma/2013/2013-06-30//halpha_scan_30Jun2013_giant_tornado.mp4,
2,2013-06-30_2,2013-06-30,http://tsih3.uio.no/lapalma/2013/2013-06-30//wb6563_30Jun2013_ff_framesel_1s.mp4,
3,2013-06-30_3,2013-06-30,http://tsih3.uio.no/lapalma/2013/2013-06-30//./wb_6563_2013-06-30T09:15:50_scans=0-2133_histoopt.mp4,
4,2013-06-30_4,2013-06-30,http://tsih3.uio.no/lapalma/2013/2013-06-30//./wb_6563_2013-06-30T09:15:50_scans=0-2133_minmax.mp4,
...,...,...,...,...
3911,2022-09-08_10,2022-09-08,http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11.mov,
3912,2022-09-08_11,2022-09-08,http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_+0.mov,
3913,2022-09-08_12,2022-09-08,http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_+657.mov,
3914,2022-09-08_13,2022-09-08,http://tsih3.uio.no/lapalma/2022/2022.09.08//./08:40:11/./Chromis-N_quick_2022-09-08_08:40:11_3934_3934_-657.mov,


In [151]:
video_dates_widget = widgets.Dropdown(options=video_dates, value=video_dates[-1], description='Dates:', disabled=False)
display(video_dates_widget)

Dropdown(description='Dates:', index=108, options=('2013-06-30', '2013-09-01', '2014-09-09', '2014-09-15', '20…

In [152]:
# Display all the things for a given day:
display(db_from_links[db_from_links['date']==video_dates_widget.value])

Unnamed: 0,ID,date,url,Target
3665,2022-09-17_0,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//halpha+blos+AIA+HMI_6pan_2022.09.17_102405.mp4,
3666,2022-09-17_1,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//halpha+blos+AIA_6pan_2022.09.17_102405.mp4,
3667,2022-09-17_2,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//halpha+blos_3pan_2022.09.17_102405.mp4,
3668,2022-09-17_3,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//halpha+blos_3pan_2022.09.17_125133.mp4,
3669,2022-09-17_4,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//halpha_3pan_2022-09-17_102405.mp4,
...,...,...,...,...
3764,2022-09-17_99,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//./14:34:13/./Chromis-N_quick_2022-09-17_14:34:13_3969_3969_+0.mov,
3765,2022-09-17_100,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//./14:34:13/./Crisp-R_quick_2022-09-17_14:34:13.mov,
3766,2022-09-17_101,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//./14:34:13/./Crisp-R_quick_2022-09-17_14:34:13_6563_6563_+0.mov,
3767,2022-09-17_102,2022-09-17,http://tsih3.uio.no/lapalma/2022/2022-09-17//./14:34:13/./Crisp-R_quick_2022-09-17_14:34:13_6563_6563_+800.mov,


In [138]:
df.to_csv('raw_data.csv', index=False)