In [None]:
from astropy.constants import c
import astropy.units as U
import math
import pyvo
import numpy as np 
import pandas as pd
import os 
import sys
import matplotlib.pyplot as plt
import seaborn as sns

parent_dir=os.path.dirname(os.getcwd())

service_url = "https://almascience.eso.org/tap"
service = pyvo.dal.TAPService(service_url)
def count_science_keywords_with_bands(service):
    query = """  
            SELECT science_keyword, band_list, member_ous_uid, frequency, t_exptime, t_max, antenna_arrays
            FROM ivoa.obscore  
            WHERE science_observation = 'T'    
            """
    db = service.search(query).to_table().to_pandas()
    db = db.drop_duplicates(subset='member_ous_uid')
    db = db.drop(db[db['science_keyword'] == ''].index)
    # Splitting the science keywords at commas
    db['science_keyword'] = db['science_keyword'].str.split(',')
    # Exploding to have one row for each combination of science keyword and band
    db = db.explode('science_keyword')
    
    # Cleaning up science keywords and band names
    db['science_keyword'] = db['science_keyword'].str.strip()
    db['band_list'] = db['band_list'].str.split(' ')
    
    # Exploding to have one row for each combination of science keyword and band
    db = db.explode('band_list')
    db = db.explode('frequency')
    db = db.explode('t_exptime')
    db = db.explode('t_max')


    # Cleaning up band names
    db['band_list'] = db['band_list'].str.strip()
    
    #Checking Freq. distribution
    plt.hist(db['frequency'], bins=50, alpha=0.75)
    plt.title('Frequency Distribution')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Count')
    plt.show()

    #Checking time integration distribution < 30000 s 
    plt.hist(db['t_exptime'][db['t_exptime'] <= 30000], bins=100, alpha=0.75, log=True)
    plt.title('Integration Time Distribution')
    plt.xlabel('Integration Time (s)')
    plt.ylabel('Count')
    plt.xscale('log')
    plt.show()

    db = db[db['t_exptime'] <= 3e4]

    frequency_bins = np.arange(db['frequency'].min(), db['frequency'].max(), 50)  # 50 GHz bins
    db['frequency_bin'] = pd.cut(db['frequency'], bins=frequency_bins)
    time_bins = np.arange(db['t_exptime'].min(), db['t_exptime'].max(), 1000)  # 1000 second bins
    db['time_bin'] = pd.cut(db['t_exptime'], bins=time_bins)
    # Counting occurrences of each combination of science keyword and band
    db_sk_b = db.groupby(['science_keyword', 'band_list']).size().unstack(fill_value=0)
    db_sk_f = db.groupby(['science_keyword', 'frequency_bin']).size().unstack(fill_value=0)
    db_sk_t = db.groupby(['science_keyword', 'time_bin']).size().unstack(fill_value=0)
    
    return db_sk_b, db_sk_f, db_sk_t


service_url = "https://almascience.eso.org/tap"
service = pyvo.dal.TAPService(service_url)
    
# Count science keywords with bands
db_sk_b, db_sk_f, db_sk_t = count_science_keywords_with_bands(service)

custom_palette = sns.color_palette("tab20")
sns.set_palette(custom_palette)

print("Counts of Science Keywords with ALMA Bands:")
print(db_sk_b)
plt.rcParams["figure.figsize"] = (14,18)
db_sk_b.plot(kind='barh', stacked=True, color=custom_palette)
plt.title('Science Keywords vs. ALMA Bands')
plt.xlabel('Counts')
plt.ylabel('Science Keywords')
plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left',title='ALMA Bands')
plt.show()  
 
print("Counts of Science Keywords with Integration Time:")
print(db_sk_t)
plt.rcParams["figure.figsize"] = (14,18)
db_sk_t.plot(kind='barh', stacked=True)
plt.title('Science Keywords vs. Integration Time')
plt.xlabel('Counts')
plt.ylabel('Science Keywords')
plt.legend(title='Integration Time', loc='upper left', bbox_to_anchor=(1.01, 1))
plt.show()

plt.rcParams["figure.figsize"] = (14,18)
db_sk_f.plot(kind='barh', stacked=True, color=custom_palette)
plt.title('Science Keywords vs. Source Frequency')
plt.xlabel('Counts')
plt.ylabel('Science Keywords')
plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left',title='Frequency')
plt.show()  


: 