In [8]:
# import libraries related to querying links and downloading files from the web
import pandas as pd
from pipmag import selector_utils as su
from pipmag import ads_utils as ads
# If you get the error ModuleNotFoundError: No module named 'pipmag', run the following line and restart the kernel:
# %pip install -e ..

In [9]:
# get the latest csv file generated by running from the root folder
# python python -m pipmag.gen_la_palma_df
la_palma_obs_data_file = '../data/la_palma_obs_data.csv'

In [10]:
# Load the DataFrame from the CSV file
df = pd.read_csv(la_palma_obs_data_file)

In [11]:
# Read the date_time column as datetime
df['date_time'] = pd.to_datetime(df['date_time'])

# List of columns to convert from strings to lists
columns_to_convert = ['links', 'video_links', 'image_links', 'instruments']

# Convert the strings in each column back to lists
for col in columns_to_convert:
    df[col] = df[col].apply(lambda x: x.split(';') if isinstance(x, str) else [])

# List of columns to convert from NaN to None 
columns_to_convert = ['comments', 'polarimetry', 'target']

# Convert the NaNs in each column back to None
for col in columns_to_convert:
    df[col] = df[col].apply(lambda x: None if pd.isna(x) else x)

# Convert the 'polarimetry' column to string 
df['polarimetry'] = df['polarimetry'].apply(lambda x: str(x))

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 793 entries, 0 to 792
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date_time    793 non-null    datetime64[ns]
 1   year         793 non-null    int64         
 2   month        793 non-null    int64         
 3   day          793 non-null    int64         
 4   time         793 non-null    object        
 5   instruments  793 non-null    object        
 6   target       349 non-null    object        
 7   comments     129 non-null    object        
 8   video_links  793 non-null    object        
 9   image_links  793 non-null    object        
 10  links        793 non-null    object        
 11  num_links    793 non-null    int64         
 12  polarimetry  793 non-null    object        
dtypes: datetime64[ns](1), int64(4), object(8)
memory usage: 80.7+ KB


In [None]:
# create a widget to display movies based on year, month, day and time
# and to update the target, instrumnets and comments columns of the dataframe
selector = su.VideoSelector2(df, ['target', 'instruments', 'polarimetry', 'comments'])
selector.create_widget()

Dropdown(description='Year:', options=(2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023), valu…

Dropdown(description='Month:', options=(), value=None)

Dropdown(description='Day:', options=(), value=None)

Dropdown(description='Time:', options=(), value=None)

Dropdown(description='Links:', options=(), value=None)

Button(description='Show', style=ButtonStyle())

Output()

Button(description='Update', style=ButtonStyle())

Text(value='', description='target:')

Text(value='', description='instruments:')

Text(value='', description='polarimetry:')

Text(value='', description='comments:')

Button(description='Update', style=ButtonStyle())

In [None]:
# 🔍 ADS Search
index = 34
search = ads.ADS_Search(df)
search.get_results(index, pretty_print=True)

In [None]:
## Rewrite keywords in target column to general terms 
def rewrite_keywords(text, target_keywords, relpacewith):
    for keyword in target_keywords:
        if keyword in text:
            text = text.replace(keyword, relpacewith)
    return text

# Target keywords: 
ACTIVE_REGION_KEYWORDS = {'active region', 'Active region', 'AR'}
QUIET_SUN_KEYWORDS     = {'quiet Sun', 'quiet sun', 'QS', 'Quiet sun'}
SUNSPOT_KEYWORDS       = {'sunspot', 'Sunspot', 'SS', 'ss', 'SUnspot'}
# Replace keywords in target column with more general terms
df['target'] = df['target'].apply(lambda x: None if pd.isna(x) else rewrite_keywords(x, ACTIVE_REGION_KEYWORDS, "Active Region"))
df['target'] = df['target'].apply(lambda x: None if pd.isna(x) else rewrite_keywords(x, QUIET_SUN_KEYWORDS, "Quiet Sun"))
df['target'] = df['target'].apply(lambda x: None if pd.isna(x) else rewrite_keywords(x, SUNSPOT_KEYWORDS, "Sunspot"))

In [None]:
# make a copy of the dataframe
df_copy = df.copy()

# List of columns to convert from lists to strings
columns_to_convert = ['links', 'video_links', 'image_links', 'instruments']
for col in columns_to_convert:
    df_copy[col] = df_copy[col].apply(lambda x: ';'.join(x))

# save the updated dataframe as a .csv file
df_copy.to_csv(la_palma_obs_data_file, index=False)