In [None]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

from pastamarkers import markers

In [None]:
import webbrowser

def open_website(k):
    website_url = f'https://fink-portal.org/{k}'
    webbrowser.open(website_url)


# Retrieve data

We begin by fetching the reduced data using the Python script `data_transfer.py` from the file `df_merged.parquet`, then importing it into Pandas data frame.

In [None]:
df = pd.read_parquet('../scripts/df_before_bug.parquet', engine='pyarrow')

In [None]:
df.head(2)

In [None]:
df.shape

Here we extract all unique IDs from our data frame:

In [None]:
unique_ids = df['objectId'].unique().tolist()
len(unique_ids)

#

To calculate the weight values \(w_i\), we use the formula: `w_i` =\begin{cases}
\frac{1}{{\sigma_i^2}}, & \text{if data is available for day } i \\
0, & \text{otherwise}
\end{cases}

In [None]:
missing_data = (df['source'] == 0) | (df['dc_sigflux'] == 0)
df['dc_weight'] = np.where(missing_data, 0, 1 / (df['dc_sigflux'] ** 2))
df['nr_weight'] = np.where(missing_data, 0, 1 / (df['nr_sigflux'] ** 2))

In [None]:
df[['source','dc_weight']].head(3)

# 

# 

# With distance - factor test 

We group the data by shared ID and create `NumPy` arrays for flux, weighted flux, and the source test(if it's a missing day(data)). We also determine the length of each time series.

In [None]:
grouped = df.groupby('objectId')

F = grouped['dc_flux'].apply(lambda x: x.values)#.values
sig = grouped['dc_sigflux'].apply(lambda x: x.values)#.values
W = grouped['dc_weight'].apply(lambda x: x.values)#.values
source = grouped['source'].apply(lambda x: x.values)#.values
lengths = grouped['source'].apply(lambda x: len(x))#.values


We define the length of our query,window or chunk, along with the limit factor and the size of each window or chunk.

In [None]:
m=0
factor = 2*m+1 + 3*np.sqrt(2*(2*m+1))
chunk_size = 2 * (m + 1)

The `'no_match_test'` function evaluates whether there are any matches in the provided array, which contains the source test values for a window. If the sum of the array is less than or equal to 1, indicating that all values are missing or only one value is present, the function returns -99. Otherwise, it returns -1 to initialize the window's status as 'no match'.

In [None]:
def no_match_test(array):
    if array.sum() <= 1 : 
        return -99 ## all are missing, or only one is
    return -1 # initialise as no match  # can be modifieted ? 

"`objects`" list contains a subset of the objects we intend to work with.

"`L_max`" is defined to facilitate partial iteration, serving as a debugging aid by allowing a limit to be set on the number of iterations performed.

We initialize the NumPy arrays with `None` values.

In [None]:
objects = unique_ids#[0:300]
num_objects = len(objects)
# L_max = int(num_objects/20)
L_max = 119
print("L_max ", L_max)

R = np.empty(num_objects, dtype=object)
Matches = np.empty(num_objects, dtype=object)
R_l = np.empty((num_objects, L_max), dtype=object)
num_by_obj = np.zeros((L_max,num_objects), dtype=object)
alp = np.empty((num_objects, L_max), dtype=object)
d = np.empty((num_objects, L_max), dtype=object)

In [None]:
indexes_objects = range(num_objects)
Q = [None] * (L_max)
#num_by_Q = [0] * (L_max) ####### we don't need this one anymore <==> sum_i
selected_Q_K_i = np.empty(L_max, dtype=object)

We initialize R using the '`no_match_test`' function.

In [None]:
for k in indexes_objects:
    
    n = lengths[objects[k]]
    num_chunks = int(n // 2)-m 

    #print(n,num_chunks)

    chunks = np.array([source[objects[k]][i*2 : (i*2+chunk_size)] for i in range(num_chunks)])
    result = np.array(list(map(no_match_test, chunks)))
    R[k] = result.copy()
    
    #Matches[k] = np.zeros(num_chunks, dtype=int)
    
    for l in range(L_max):
        R_l[k][l] = result.copy()
    # a for loop isn't a good idea !!!! 
    


### Loop to compute the distance of the subsequence in the time series to its nearest neighbor.

In [None]:
start_time = time.time()

l= 0
while (l < L_max):
    indexes_array = np.array([np.where(array == -1)[0] for array in R], dtype=object)
    
    has_non_empty_list = np.any([value.size > 0 for value in indexes_array])
    if not has_non_empty_list:
        print("break , l = ", l )
        break
    
#     if l == 0:
#         Q[0]= [0]  *chunk_size
#     else:  
    for k in range(len(R)): # we can remove the loop for here ! ???
              if indexes_array[k].size != 0:
                f = F[objects[k]]

                index_no_match = indexes_array[k][0]

                #print(index_no_match, R[k])
                k_Query_taked = k
                selected_Q_K_i[l] = [k,index_no_match]

                Q[l] = f[index_no_match*2 : index_no_match*2 +chunk_size]
                break

            
    for k in range(len(objects)):
        f = F[objects[k]]
        w = W[objects[k]]
        n = lengths[objects[k]]
        n_c = n - 2*m # (number of chunks x 2) ! it's (n/2 - m) but to optimize we mutiply by 2 directly !  
        #print(n, n_c,len(R[k]))



        s_1 = np.zeros(n_c, dtype=float)
        s_2 = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1): 
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation

            s_1[:] += (f[j*2:j*2+ n_c]*h*w[j*2:j*2+ n_c])
            s_2[:] += (h**2 * w[j*2:j*2+ n_c])


        s_n = s_1[::2] + s_1[1::2]  
        s_d = s_2[::2] + s_2[1::2] 
        
        mask_no_0 = (s_d != 0)
        alp[k][l] = np.zeros_like(s_d, dtype=float)

        alp[k][l][mask_no_0] = s_n[mask_no_0] / s_d[mask_no_0] # # Perform division only where s_d(i) is not zero

        alpha = np.repeat(alp[k][l], 2) # duplicate alpha for each value (one for r and second for g)
     
    
        dd = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1):
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation
            
            dd[:] += ((f[j*2:j*2+ n_c] - alpha[:] * h)**2) * w[j*2:j*2+ n_c] 
            #alpha[:n-j*2] ==> alpha[:]

        d[k][l] = dd[::2] + dd[1::2]
        
        factor_comparison =  d[k][l] <= factor
                
        R[k][indexes_array[k][factor_comparison[indexes_array[k]]]] = l # explanation follows below!
        
        #count = 0
        for i in range(len(factor_comparison)):
            if factor_comparison[i] and R_l[k][l][i] != -99:
                R_l[k][l][i] = l
                num_by_obj[l][k]+=1

                

    R_l[k_Query_taked][l][index_no_match] = -2

 
    l += 1 
    
print("l = ",l)



end_time = time.time()

# Compute the elapsed time
elapsed_time = end_time - start_time

print("Elapsed time:", elapsed_time, "seconds")

Let's break down the expression `R[k][indexes_array[k][factor_comparison[indexes_array[k]]]] = l` step by step:

1. `indexes_array[k]`: This selects the array of indexes corresponding to the k-th element of `indexes_array`.
2. `factor_comparison[indexes_array[k]]`: This applies boolean indexing to `factor_comparison` using the indexes from `indexes_array[k]`. It selects only the elements of `factor_comparison` corresponding to the indexes in `indexes_array[k]`.
3. `indexes_array[k][factor_comparison[indexes_array[k]]]`: This gives the indices where the condition `factor_comparison` is true for the k-th element of `indexes_array`.
4. `R[k][indexes_array[k][factor_comparison[indexes_array[k]]]]`: This uses the indices obtained in the previous step to select elements from the k-th row of `R`.
5. `= l`: Finally, it assigns the value `l` to the selected elements of `R[k]`.


# 

# 

### Calculate the Matrix of Matches (with windows represented in rows and queries as columns)

In [None]:
rangek = range(len(objects))

# Initialize list to hold transposed arrays for each k
Matrices_by_k = []

no_missing = np.zeros(len(rangek), dtype= object)
length_no_missing = np.zeros(len(rangek), dtype= int)

Reference_Table_list = []

for k in rangek:
    no_missing[k] = np.where(R[k] != -99)[0]  
    
    length_no_missing[k] = len(no_missing[k])
    
    # Initialize transposed array
    Matrix= np.empty((length_no_missing[k], len(R_l[k])), dtype=object)
    
    refrence_table_k = np.empty(length_no_missing[k], dtype=object)
    
    # Transpose and store the arrays
    for i, idx in enumerate(no_missing[k]):
        for j, arr in enumerate(R_l[k]):
            if arr[idx] == -2 or arr[idx] >= 0:
                Matrix[i][j] = 1
            else:
                Matrix[i][j] = 0
                
        refrence_table_k [i] =  np.array([k ,idx])

                
    # Append transposed array to the list
    Matrices_by_k.append(Matrix)
    Reference_Table_list.append(refrence_table_k)

Matrices_by_k

In [None]:
Matrix_Matches = np.concatenate(Matrices_by_k, axis=0)
Reference_Table = np.concatenate(Reference_Table_list, axis=0)
Matrix_Matches

In [None]:
len(Reference_Table),Reference_Table


In [None]:
def get_i_values_for_k(specific_k):
    return no_missing[specific_k]
get_i_values_for_k(0)  

In [None]:
def get_k_i_for_row(global_idx):
    return Reference_Table[global_idx]
k,i = get_k_i_for_row(2)
k,i

In [None]:
def get_i_indexes_for_k(specific_k):
    idx = 0 
    for k in rangek:
        if k == specific_k:
            return list(range(idx, idx + length_no_missing[k]))
        idx += length_no_missing[k]

get_i_indexes_for_k(0)

In [None]:
sum_j_rows = np.sum(Matrix_Matches, axis=1)
sum_j_rows


In [None]:
sum_i_columns = np.sum(Matrix_Matches, axis=0)
sum_i_columns

#

#

# Matrix only for queries (len(Qs) x len(Qs))

In [None]:
Matrix_Queries = []

for k,i in selected_Q_K_i:#[:5]:
    #print(get_i_values_for_k(k),i)
    i_k = np.where(get_i_values_for_k(k) == i)[0][0]
    #print(get_i_indexes_for_k(k),i_k)
    global_idx = get_i_indexes_for_k(k)[i_k]
    Matrix_Queries.append(Matrix_Matches[global_idx])

    #[0][0]
Matrix_Queries

In [None]:
len(Matrix_Queries)

In [None]:
sum_j_rows_queries = np.sum(Matrix_Queries, axis=1)

sum_i_columns_queries = np.sum(Matrix_Queries, axis=0)
sum_i_columns_queries,sum_j_rows_queries

In [None]:
sum_i_columns_queries.argmax(),sum_i_columns_queries.max()

In [None]:
sum_j_rows_queries.argmax(), sum_j_rows_queries.max()

In [None]:
sum_j_rows_queries.sum(), sum_i_columns_queries.sum()

#

#

In [None]:
total_matches = sum(sum_i_columns) + sum(sum_j_rows)

I_i_j = Matrix_Matches[:].copy() #*(sum_i[:])
I_i_j


In [None]:
for j, sj in enumerate(sum_j_rows):
    for i, si in enumerate(sum_i_columns):
        dn = (si*sj)
        if dn == 0:
            I_i_j[j][i] *= np.inf # we don't need this anymore , No, we do , if l_max != len(Q)
        else:
            I_i_j[j][i] *= total_matches/ dn
I_i_j

In [None]:
# nonzero_indices = np.nonzero(np.outer(sum_j, sum_i))

# # Update only non-zero elements
# I_i_j[nonzero_indices] = I_i_j[nonzero_indices] / (sum_i[nonzero_indices[1]] * sum_j[nonzero_indices[0]])

# #I_i_j[I_i_j == 0] = np.inf


#

###### Compute the query with the highest number of matches and the one with the fewest matches.

In [None]:
max(sum_i_columns),min(sum_i_columns), np.argmax(sum_i_columns), np.argmin(sum_i_columns), selected_Q_K_i[94], selected_Q_K_i[100]

###### Compute the window with the highest number of matches on queries and the one with the fewest matches.

In [None]:
#flattened_array = np.concatenate([arr.flatten() for arr in sum_j])
maximum_value = np.max(sum_j_rows)
maximum_value, np.argmax(sum_j_rows),sum_j_rows[155]

In [None]:
# Find indices of non-zero values
non_zero_indices = np.argwhere(sum_j_rows != 0).flatten()

# Retrieve non-zero values
non_zero_values = sum_j_rows[non_zero_indices]

# Find the index of the minimum non-zero value
min_non_zero_index = non_zero_indices[np.argmin(non_zero_values)]

min_non_zero_value = sum_j_rows[min_non_zero_index]

min_non_zero_value, min_non_zero_index

# 

### Function to detect a specific window in a time series (specified by its object and index within this object).

In [None]:
def if_a_query(k_TS, i_TS, selected_Q_K_i):
    mask = selected_Q_K_i != None
    for index_Q, (k, i) in enumerate(selected_Q_K_i[mask]):
        if k_TS == k and i_TS == i:
            return True, index_Q, f"This window is used as the {index_Q}th query! "
    return False, 0 , "This window is not used as a query ! "

In [None]:
def get_m(i,j):
    return concatenated_array[j][i] ### I need to check this 2 j,i or i,j

def get_i_by_sum(i):
        return sum_i_columns[i]

def get_j_by_sum(j):
        return sum_i_columns[j]

    
def get_k_i_of_Q(l):
    return selected_Q_K_i[l]


In [None]:
# a_query, l , string = if_a_query(22,5,selected_Q_K_i)
# plot_distance_flux(22,l)

#

#

#

#

#

## Functions to compute the numbers of matches and plot the results 

#### these functions should be in another file

In [None]:
def plot_distance_flux(k,l,open_Fink= False):
    
    plt.figure(figsize=(14, 12))

    # Plot for F[k]
    plt.subplot(2, 1, 1)

    for i in range(int(len(F[objects[k]])/2)):
        if source[objects[k]][2*i] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i], 
                     #sig[objects[k]][2*i]*0.4,
                     c='C0', marker='x')
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i], 
                     sig[objects[k]][2*i],
                     c='C0', marker='o')

        if source[objects[k]][2*i+1] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     #sig[objects[k]][2*i+1]*0.4,
                     c='C1', marker='x')
            
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     sig[objects[k]][2*i+1],
                     c='C1', marker='o')
            
        a_query,l_1, string = if_a_query(k,i,selected_Q_K_i)
            
        if a_query and l_1 == l:
            print(string)
            
            # Define the window of indices
            window_start = i  # Index of the window start
            window_end = i + int(len(Q[l])/2 - 1)  # Index of the window end

            # Create an array of float indices
            indices = np.arange(window_start, window_end + 1)
            indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

            # Plot a shaded region for the window
            plt.fill_between(indices, min(Q[l])/1.2, max(Q[l])*1.2, color='gray', alpha=0.2)
            
        
    plt.plot([], [], color='C1', marker='x', label='missing points !')
    plt.plot([], [], color='C0', marker='o', label='origin')
    plt.plot([], [], color='C0', marker='x', label='missing points !')
    plt.plot([], [], color='C1', marker='o', label='origin')
    
    
    ################################### what about the seccond window !? 
    moy_alp = 1+0*alp[k][l].mean()
    #moy_alp = np.median(F[k])/np.median(Q[l]) #Manu test

    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][::2], c='C0', linewidth = 1)
    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][1::2], c='C1', linewidth = 1)

    plt.plot(np.arange(len(Q[l]) // 2) - 1-m, Q[l][::2]*moy_alp, c='g', label='Q[l]',marker='.', linewidth=3, zorder=3)
    plt.plot(np.arange(len(Q[l]) // 2) - 1-m, Q[l][1::2]*moy_alp, c='r', label='Q[l]',marker='.', linewidth=3, zorder=3)

    # Define the window of indices
    window_start = -1-m  # Index of the window start
    window_end = int(len(Q[l])/2 - 1-1 -m)  # Index of the window end

    # Create an array of float indices
    indices = np.arange(window_start, window_end + 1)
    indices = np.concatenate(([indices[0] - 0.4], indices, [indices[-1] + 0.4]))

    # Plot a shaded region for the window
    plt.fill_between(indices, min(Q[l])/1.2*moy_alp, max(Q[l])*1.2*moy_alp, color='gray', alpha=0.2)

    # plt.xlabel('Index')
    plt.ylabel('Flux')
    # plt.title('Flux Plot')
    plt.legend()
    




    ###############                       Plot for d[k][0]                       ###############

    plt.subplot(2, 1, 2)
    plt.plot([-1-m, -1-m], [0, 0], color='none')  # Plot an empty line with zero length
    plt.plot(range(len(d[k][l])), d[k][l],color='C0', linestyle='-',linewidth=1)
    
#     matches = all_matches[k]#Matches[k]#
#     for i, match_count in enumerate(matches):
#         plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(match_count), fontsize=10, color='red', ha='left')
    matches_idx = get_i_indexes_for_k(k)
    matches2 = sum_j_rows[matches_idx]

    # Plot dummy points with desired colors and markers
    plt.plot([], [], color='black', marker=markers.soli, label='Query chosed')
    plt.plot([], [], color='red', marker='x', label='missing cases !')
    plt.plot([], [], color='blue', marker=markers.ravioli, label='Matched here')
    plt.plot([], [], color='green', marker=markers.stelline, label='Matches with a different `l` (Query)')
    plt.plot([], [], color='yellow', marker=markers.tortellini, label=f'Not matched with any of the {L_max} options we selected')
    
    c= 0
    
    for i, val in enumerate(R_l[k][l]):
            if val == -99:
                plt.scatter(i, d[k][l][i], color='red', marker='x', s=50)  # marker size 50
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, 0, fontsize=10, color='red', ha='left')
                c+=1
#                 plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, 0*str(matches2[i]), fontsize=10, color='red', ha='left')

            elif val == l:
                plt.scatter(i, d[k][l][i], color='blue', marker=markers.ravioli, s=50)
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches2[i-c]), fontsize=10, color='blue', ha='left')

            elif val == -2:
                plt.scatter(i, d[k][l][i], color='black', marker=markers.soli, s=50) 
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches2[i-c]), fontsize=10, color='black', ha='left')

            elif matches2[i-c]==0:
                plt.scatter(i, d[k][l][i], color='yellow', marker=markers.tortellini, s=50) 
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches2[i-c]), fontsize=10, color='yellow', ha='left')

            else:
                plt.scatter(i, d[k][l][i], color='green', marker=markers.stelline, s=75)
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches2[i-c]), fontsize=10, color='green', ha='left')




    plt.legend(fontsize=8) 
    #plt.legend(loc='upper right', fontsize=8)

    plt.ylabel('distance')



    ###############                       Plot for alpha[k][l]                       ###############

#     plt.subplot(3, 1, 3)
#     plt.plot(range(len(alp[k][l])), alp[k][l], marker='.', linestyle='-',color='black')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     plt.title('alpha Plot')




    plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
    plt.suptitle(f"{objects[k]}, k = {k}, l = {l} ", fontname='Arial', fontsize=16, fontweight='bold')
    plt.show()
    

    if open_Fink:
        open_website(objects[k])




In [None]:
selected_Q_K_i[116]

In [None]:
ratios[116]

In [None]:
plot_distance_flux(23056,116)


# 



# 



# 


# plot by multiple l

In [None]:
for l in [0,1]:
    for k in [0,1]:
        plot_distance_flux(k,l)

In [None]:
l_values = [100,0]#range(119)
k_values=range(len(objects))
k=1
for l in l_values:
    for k in k_values[:7]:
        plot_distance_flux(k,l) # True == open website


#

## plot histogram of numbers of matches for each Q

We graph the number of matches for each query.

In [None]:
indexes = range(len(sum_i_columns))

# Plotting the histogram
plt.hist(indexes, bins=len(sum_i_columns), weights=sum_i_columns, color='C1', edgecolor='white')

# Adding labels and title
plt.xlabel('Indices')
plt.ylabel('Numbers')
plt.title('Histogram of numbers of matches for each Q ')

# Display the histogram
plt.show()


# Compute the highest and lowest 10 values and their indexes
sorted_indices = sorted(range(len(sum_i_columns)), key=lambda i: sum_i_columns[i])
lowest_10_indices = sorted_indices[:10]
highest_10_indices = sorted_indices[-10:][::-1]  # Reverse to get in descending order

total_windows = len(Matrix_Matches)

# Print the highest and lowest 10 values and their indexes with percentages
print("Highest 10:\n")
for i in highest_10_indices:
    percentage = (sum_i_columns[i] / total_windows) * 100
    print(f"Index: {i}, Matches: {sum_i_columns[i]}, Percentage: {percentage:.2f}%")

print("\n\nLowest 10:\n")
for i in lowest_10_indices:
    percentage = (sum_i_columns[i] / total_windows) * 100
    print(f"Index: {i}, Matches: {sum_i_columns[i]}, Percentage: {percentage:.2f}%")

    
    


In [None]:
import csv

# Specify the file name
file_name = "Num_for_Qs.csv"

# Combine the data into a list of tuples
data = list(zip(indexes, sum_i_columns))

# Write the data to a CSV file
with open(file_name, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    
    # Write a header if needed
    csv_writer.writerow(['Queries', 'Nb of matches'])
    
    # Write the data
    csv_writer.writerows(data)

print("Data has been written to", file_name)


We calculate the minimum and maximum number of matches, along with their corresponding indexes (where index represents the index of the query).

In [None]:
max(sum_i_columns),min(sum_i_columns),np.argmax(sum_i_columns), np.argmin(sum_i_columns)

#

#

### plot Qr/Qg 

In [None]:
Q_array = np.array(Q)
ratios = Q_array[:, 0] / Q_array[:, 1]

plt.plot(ratios)
plt.title('Ratio of First Value to Second Value')
plt.xlabel('Index')
plt.ylabel('Ratio')
plt.grid(True)
plt.show()

### Here, we plot the number of matches as a function of the ratios (Qr/Qg).

In [None]:
Q_array = np.array(Q)
ratios = Q_array[:, 0] / Q_array[:, 1]

plt.scatter(ratios[:-3],sum_i_columns[:-3]) ###################### -3 to exclude the 800 .. 
plt.title('num of matches for each Q in function of Ratio ')
plt.xlabel('Ratio Qr/Qg')
plt.ylabel('num(l)')
plt.grid(True)
plt.show()


print("\nHighest 10 Ratios, Matches, and Indices:\n")
for i in range(10):
    index = highest_10_indices[i]
    ratio = ratios[index]
    matches = sum_i_columns[index]
    print("Index:", index, "Ratio:", ratio, "Matches:", matches)
    
print("\n\nLowest 10 Ratios, Matches, and Indices:\n")
for i in range(10):
    index = lowest_10_indices[i]
    ratio = ratios[index]
    matches = sum_i_columns[index]
    print("Index:", index, "Ratio:", ratio, "Matches:", matches)


In [None]:
selected_Q_K_i[64], objects[854],plot_distance_flux(854, 64, True)

In [None]:
import csv

# Specify the file name
file_name = "data.csv"

# Combine the data into a list of tuples
data = list(zip(ratios, sum_i_columns))

# Write the data to a CSV file
with open(file_name, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    
    # Write a header if needed
    csv_writer.writerow(['Ratio', 'Sum of i Columns'])
    
    # Write the data
    csv_writer.writerows(data)

print("Data has been written to", file_name)


#

#

### Compute the highest and the lowest indices 

In [None]:
# lowest_indices_array = []
sorted_queries_list = []


for l in range(len(num_by_obj)):

    array = np.array(num_by_obj[l])

    # Get indices of sorted array in ascending order
    sorted_queries = np.argsort(array)
    sorted_queries_list.append(sorted_queries)  # Append sorted indices list

#     # Indices of 10 lowest values

#     lowest_values = array[sorted_indices[0]]
#     all_lowest_indices = np.where(array == lowest_values)[0]
#     lowest_indices_array.append(all_lowest_indices)


#

#

In [None]:
num_by_obj[1]

In [None]:
# for k in rangek:
#     print(np.sum(Matrices_by_k[k], axis=0))

## plot histogram of numbers of matches for all objects with a specific Q

In [None]:
l=94
indexes = range(len(num_by_obj[l]))

# Plotting the histogram
plt.hist(indexes, bins=len(num_by_obj[l]), weights=num_by_obj[l], color='C1', edgecolor='black')
plt.xlabel('Objects')
plt.ylabel('Numbers')
plt.title('Histogram of numbers of matches for a specific Q for all objects')
plt.show()



total_queries = len(Q)
lowest_10_indices = sorted_queries_list[l][:10]


array = np.array(num_by_obj[l])
lowest_value = array[sorted_queries_list[l][0]]
all_lowest_indices = np.where(array == lowest_value)[0]

lowest_10_indices_1 = sorted_queries_list[l][len(all_lowest_indices):len(all_lowest_indices)+10]
# These aren't simply the lowest 10 values; rather,
#they represent the lowest 10 values with the condition that their number of matches exceeds
#that of the lowest value. Suppose the lowest value of matches is 0, these objects are those with more than 0 matches.
# (the lowest ones with Nb matches > 0  ).


highest_10_indices = sorted_queries_list[l][-10:][::-1]  # Reverse to get in descending order

# Print the highest and lowest 10 values and their indexes with percentages
print(f"Q[{l}]:\n")
print("Highest 10:\n")
for k in highest_10_indices:
    percentage = (num_by_obj[l][k] / total_queries) * 100
    print(f"Index: {k}, Id : {objects[k]}, Matches: {num_by_obj[l][k]}, Percentage: {percentage:.2f}%")

print("\nLowest 10:\n")
for k in lowest_10_indices:
    percentage = (num_by_obj[l][k] / total_queries) * 100
    print(f"Index: {k}, Id : {objects[k]}, Matches: {num_by_obj[l][k]}, Percentage: {percentage:.2f}%")

print("\nLowest 10 with Nb matches > (lowest Nb):\n")
for k in lowest_10_indices_1:
    percentage = (num_by_obj[l][k] / total_queries) * 100
    print(f"Index: {k}, Id : {objects[k]}, Matches: {num_by_obj[l][k]}, Percentage: {percentage:.2f}%")


#

In [None]:
k= 100 
for k in rangek:
    print(k, num_by_obj[l][k]/length_no_missing[k]*100)

In [None]:
num_by_obj[l][136], count_missing_windows[136], len(R[136])

In [None]:
k= 864 
num_by_obj[l][k],length_no_missing[k], count_missing_windows[k], len(R[k])

In [None]:
plot_distance_flux(k,l)

In [None]:
lensss = list(map(len, R))

lensss  

### We restrict the selection of objects to those with a specific count of missing windows! 
Therefore, we prioritize objects with the highest number of intact windows.

In [None]:
count_missing_windows = np.array([np.sum(subarr == -99) for subarr in R])

result = np.where(count_missing_windows <=10, True, False)
# The result array represents the condition for each object (i.e., objects with few missing windows).
result,len(result),count_missing_windows

We extract the indices where the condition is true, representing the objects with the highest numbers of normal data.

In [None]:
indices = np.where(result)[0]
l = 0
selected_arrays = num_by_obj[l][indices]
selected_arrays, indices, len(selected_arrays)

We indentify and plot the object with the highest number of matches for a query "l".

In [None]:
k = indices[np.argmax(selected_arrays)]
k

In [None]:
mask = selected_Q_K_i != None
for index_Q, (k1, i) in enumerate(selected_Q_K_i[mask]):
    if 17435 == k1 :
        print(f"This window is used as the {index_Q}th query! ")
        break

In [None]:
plot_distance_flux(1,l), print(objects[k])

###

### Generate a plot illustrating the match counts of the selected objects(Limited to missing window counts.) for a particular query, Q.

In [None]:
l=0
indexes = range(len(num_by_obj[l][indices]))

# Plotting the histogram
plt.hist(indexes, bins=len(num_by_obj[l][indices]), weights=num_by_obj[l][indices], color='skyblue', edgecolor='C0')

# Adding labels and title
plt.xlabel('Indices')
plt.ylabel('Numbers')
plt.title('Histogram of numbers of matches for a specific Q for a limited nb of objects')

# Display the histoazgram
plt.show()

##

#

#

### plot the highest and the lowest indices 

In [None]:
l=94
count = 1
for k in sorted_indices_list[l][len(lowest_indices_array[l]):len(lowest_indices_array[l])+10]:
    print(count)
    count += 1
    print(objects[k],k,"l =",l,".")
    print("Number of matches in this object of this Q",num_by_obj[l][k])
    plot_distance_flux(k,l,True)
    

### 

### 

# Negative Flux ! 

In [None]:
negatives = []
k_negatives = []
for k in rangek:
    if np.any(F[objects[k]] <0):
        k_negatives.append(k)
        negative_mask = F[objects[k]] < 0 
        negatives.append(F[objects[k]][negative_mask] / sig[objects[k]][negative_mask])

negatives,k_negatives

In [None]:
for idx, list1 in enumerate(negatives):
    #print(list1)
    if np.any(np.array(list1) <-4):
        print(F[objects[k_negatives[idx]]]/sig[objects[k_negatives[idx]]])
        plot_distance_flux(k_negatives[idx],0)

In [None]:
F[objects[5053]]/sig[objects[5053]]

In [None]:

ravel = np.concatenate(negatives)

In [None]:
plt.hist(ravel,bins='fd')

In [None]:
len(ravel)

In [None]:
for k in k_negatives[:3]:
    plot_distance_flux(k,0)

### 

### 

### 

#

#

#

#

#

#

# Distinguishing the two cases!

In [None]:
m=1 # here the minimin is 1 !! 
factor = 2*m+1 + 3*np.sqrt(2*(2*m+1))
chunk_size = 2 * (m + 1)

In [None]:
objects = unique_ids[0:10]
num_objects = len(objects)
L_max = int(num_objects/2)

print("L_max ", L_max)

R_r = np.empty(num_objects, dtype=object)
R_g = np.empty(num_objects, dtype=object)

R_l_r = np.empty((num_objects, L_max), dtype=object)
R_l_g = np.empty((num_objects, L_max), dtype=object)

alp = np.empty((num_objects, L_max), dtype=object)
d_r = np.empty((num_objects, L_max), dtype=object)
d_g = np.empty((num_objects, L_max), dtype=object)

In [None]:
indexes_objects = range(num_objects)
Q = [None] * (L_max)

In [None]:
for k in indexes_objects:

    n = lengths[objects[k]]
    num_chunks = int(n // 2)-m 
    
    chunks_g = np.array([source[objects[k]][i*2 : (i*2+chunk_size):2] for i in range(num_chunks)])
    chunks_r = np.array([source[objects[k]][1+i*2 : (i*2+chunk_size):2] for i in range(num_chunks)])
    result_r = np.array(list(map(no_match_test, chunks_r)))
    result_g = np.array(list(map(no_match_test, chunks_g)))
    
    R_r[k] = result_r.copy()
    R_g[k] = result_g.copy()
    
        
    for l in range(L_max):
        R_l_r[k][l] = result_r.copy()
        R_l_g[k][l] = result_g.copy()
    # a for loop isn't a good idea !!!! 
        

In [None]:
start_time = time.time()


l= 0
while (l < L_max):
    indexes_array_r = np.array([np.where(array == -1)[0] for array in R_r], dtype=object)
    indexes_array_g = np.array([np.where(array == -1)[0] for array in R_g], dtype=object)
#     break
    empty_lists = np.all([value.size == 0 for value in indexes_array_r]) or np.all([value.size == 0 for value in indexes_array_g])
    if empty_lists:
        print("break , l = ", l )
        break
        
    for k in range(len(R_g)): # we can remove the loop for here ! ???
          if (indexes_array_g[k].size != 0):
            f = F[objects[k]]
            
            k_Query_taked_g = k 

            index_no_match_g = indexes_array_g[k][0] # here x 2 because we have the indexes only for g (I need to check this point)        
            Q[l] = f[index_no_match_g* 2  : index_no_match_g* 2  +chunk_size:2]
            break
            
    for k in range(len(R_r)): # we can remove the loop for here ! ???
          if (indexes_array_r[k].size != 0):
            f = F[objects[k]]
            k_Query_taked_r = k 

            index_no_match_r = indexes_array_r[k][0]  #+1
            for i in range(1, chunk_size, 2):
                 Q[l] = np.insert(Q[l], i, f[index_no_match_r*2 + i : index_no_match_r*2 + i + 1])

            #Q[l] = f[index_no_match : index_no_match +chunk_size]
            break    
    
    
    
    for k in range(len(objects)):
        f = F[objects[k]]
        w = W[objects[k]]
        n = lengths[objects[k]]
        n_c = n - 2*m # (number of chunks x 2) ! it's (n/2 - m) but to optimize we mutiply by 2 directly !  
        #print(n, n_c,len(R[k]))



        s_1 = np.zeros(n_c, dtype=float)
        s_2 = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1): 
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation

            s_1[:] += (f[j*2:j*2+ n_c]*h*w[j*2:j*2+ n_c])
            s_2[:] += (h**2 * w[j*2:j*2+ n_c])

        s_n = s_1#[::2] + s_1[1::2]  # this needs to optimizate with new variables ! 
        s_d = s_2#[::2] + s_2[1::2] 
        
        mask_no_0 = (s_d != 0)
        alp[k][l] = np.zeros_like(s_d, dtype=float)

        alp[k][l][mask_no_0] = s_n[mask_no_0] / s_d[mask_no_0] # # Perform division only where s_d(i) is not zero

        alpha = alp[k][l]#np.repeat(alp[k][l], 2) # duplicate alpha for each value (one for r and second for g)
     
    
        dd = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1):
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation
            
            dd[:] += ((f[j*2:j*2+ n_c] - alpha[:] * h)**2) * w[j*2:j*2+ n_c] 
            #alpha[:n-j*2] ==> alpha[:]

        d_g[k][l] = dd[::2]
        d_r[k][l] = dd[1::2]
        
        """factor_comparison =  d_g[k][l] <= factor
        R_g[k][indexes_array_g[k][factor_comparison[indexes_array_g[k]]]] = l # explanation follows below!
                
        factor_comparison =  d_r[k][l] <= factor
        R_r[k][indexes_array_r[k][factor_comparison[indexes_array_r[k]]]] = l # explanation follows below!"""
        

        for i in indexes_array_g[k]:
            if d_g[k][l][i] <= factor : 
                R_g[k][i] = l
        for i in indexes_array_r[k]:
            if d_r[k][l][i] <= factor : 
                R_r[k][i] = l
                
        factor_comparison = d_r[k][l] <= factor      
        
        for i in range(len(factor_comparison)):
            if factor_comparison[i] and R_l_r[k][l][i] != -99: 
                R_l_r[k][l][i] = l
            
        factor_comparison = d_g[k][l] <= factor      

        for i in range(len(factor_comparison)):
            if factor_comparison[i] and R_l_g[k][l][i] != -99: 
                R_l_g[k][l][i] = l
                
        """for i in indexes_array[k]:
            #print(i)
            if d[k][l][i] <= factor : 
                R[k][i] = l"""
                
    R_l_r[k_Query_taked_r][l][index_no_match_r] = -2
    R_l_g[k_Query_taked_g][l][index_no_match_g] = -2

    
    l += 1 
    
print("l = ",l)



end_time = time.time()

# Compute the elapsed time
elapsed_time = end_time - start_time

print("Elapsed time:", elapsed_time, "seconds")

#

In [None]:
all_matches_r = number_matches_for_all_k(R_l_r)
all_matches_g = number_matches_for_all_k(R_l_g)


In [None]:
k = 1
l = 8
unique_ids[k]

In [None]:
color = ['green','red']
color_Q = ['C0','C1']

def plot_distance_flux_r_g(k,l,j=0,d=d_g,R_l=R_l_g,matches=all_matches_g[k]): # default case : g (j=0 ==> green)
    
    # # Plotting
    plt.figure(figsize=(15, 10))

    # Plot for F[k]
    plt.subplot(2, 1, 1)

    for i in range(int(len(F[objects[k]])/2)):
        if source[objects[k]][2*i+j] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i+j], 
                     #sig[objects[k]][2*i+j]*0.4,
                     c=color[j], marker=marker)
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i+j], 
                         sig[objects[k]][2*i+j],
                         c=color[j], marker=marker)


    plt.plot([], [], color=color[j], marker='o', label='origin')
    plt.plot([], [], color=color[j], marker='x', label='missing points !')


    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][j::2], c=color[j], linewidth = 1)


    plt.plot(range(int(len(Q[l])/2)), Q[l][j::2], c=color_Q[j], label='Q[l]',marker='.')

    # Define the window of indices
    window_start = -0  # Index of the window start
    window_end = int(len(Q[l])/2 - 1)  # Index of the window end

    # Create an array of float indices
    indices = np.arange(window_start, window_end + 1)
    indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

    # Plot a shaded region for the window
    plt.fill_between(indices, min(Q[l][j::2])/1.2, max(Q[l][j::2])*1.2, color='gray', alpha=0.2)

    plt.ylabel('Flux')
    plt.legend()

    # Plot for d[k][0]

    plt.subplot(2, 1, 2)
    plt.plot(range(len(d[k][l])), d[k][l],color='C0', linestyle='-',linewidth=1)

        # Plot dummy points with desired colors and markers
    plt.plot([], [], color='black', marker='o', label='Query chosed')
    plt.plot([], [], color='red', marker='x', label='missing cases !')
    plt.plot([], [], color='blue', marker='s', label='Matched here')
    plt.plot([], [], color='green', marker='*', label='Matches with a different `l` (Query)')
    plt.plot([], [], color='yellow', marker='^', label=f'Not matched with any of the {L_max} options we selected')

    for i, val in enumerate(R_l[k][l]):
            if val == -99:
                plt.scatter(i, d[k][l][i], color='red', marker='x', s=50)  # marker size 50
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches[i]), fontsize=10, color='red', ha='left')

            elif val == l:
                plt.scatter(i, d[k][l][i], color='blue', marker='s', s=50)
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches[i]), fontsize=10, color='blue', ha='left')

            elif val == -2:
                plt.scatter(i, d[k][l][i], color='black', marker='o', s=50) 
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches[i]), fontsize=10, color='black', ha='left')
            elif matches[i]==0:
                plt.scatter(i, d[k][l][i], color='yellow', marker='^', s=50) 
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches[i]), fontsize=10, color='yellow', ha='left')

            else:
                plt.scatter(i, d[k][l][i], color='green', marker='*', s=75)
                plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(matches[i]), fontsize=10, color='green', ha='left')




    plt.legend(fontsize=8) 

    plt.ylabel('distance')


    # # Plot for alphat[k][0]
    # plt.subplot(4, 1, 4)
    # plt.plot(range(len(alp[k][l][::2])), alp[k][l][::2], marker='.', linestyle='-',color='black')
    # plt.xlabel('Index')
    # plt.ylabel('alpha')
    # # plt.title('alpha Plot')


    plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
    plt.suptitle(f"{objects[k]}, k ={k}, l ={l}", fontname='Arial', fontsize=16, fontweight='bold')

    plt.show()



In [None]:
l_values = [0,1,2,3]#range(L_max)  
k= 0
for l in l_values:
#     plot_distance_flux_r_g(k,l,1,d_r,R_l_r,all_matches_r[k])
    plot_distance_flux_r_g(k,l,0,d_g,R_l_g,all_matches_g[k])
    

#

#

#

#

#

#

#

#

#

# TEST

In [None]:

# def get_j_by_k(k_selected):
#     global_inx = 0 
#     for k in rangek:
#         if k == k_selected:
#             return list(range(global_inx, global_inx + int(lengths[objects[k]] / 2) - m))
#         global_inx += (int(lengths[objects[k]] / 2) - m)



In [None]:
for i,k in enumerate(objects):
    if k == 'ZTF18cckcza':#'ZTF19acggsdc':
        print(i)
        break

In [None]:
s = 0
global_inx = 633
# def get_k_i_of_row(global_inx):
#     if global_inx < (int(lengths[objects[0]]/2)-m):
#          return  0 , global_inx
#     for k in rangek:
#         global_inx -= (int(lengths[objects[k]]/2)-m)
#         if global_inx < (int(lengths[objects[k+1]]/2)-m):
#             return  k+1 , global_inx
# #def get
# get_k_i_of_row(31)

In [None]:
# i_values = []
# saved_k = -1
# specific_k =7 
# for arr in Reference_Table:
#     if arr[0] == specific_k:
#         i_values.append(arr[1])
#         saved_k = specific_k
#     elif saved_k != -1:
#         print(saved_k)
#         break
# np.array(i_values), no_missing[specific_k]

In [None]:
# idx = 68
# def get_k_and_i_by_row(global_inx):
    
    
#     if global_inx < length_no_missing[0]:
#          return  0 , no_missing[0][global_inx]
        
#     for k in rangek:
#         global_inx -= length_no_missing[k]
#         if global_inx < length_no_missing[k+1]:
#             return  k+1 , no_missing[k+1][global_inx]
        
        
# get_k_and_i_by_row(idx),idx 

In [None]:
# transform to a function 
F[objects[10662]], F[objects[10662]][7*2:7*2+chunk_size], objects[10662], W[objects[10662]][7*2:7*2+chunk_size]

In [None]:
### max by j (as a window)
k , i = get_k_i_of_row(155)
l, string = if_a_query(k, i, selected_Q_K_i)
print(objects[k],"idx = ",i,"l =",l,".",string), plot_distance_flux(k,l)

### min by j (as a window)
k , i = get_k_i_of_row(global_inx)
l, string = if_a_query(k, i, selected_Q_K_i)
print(objects[k],"idx = ",i,"l =",l,".",string), plot_distance_flux(k,l)

k= 7683  ### 6 max by Q (as a Query )
i = 6 
l, string = if_a_query(k, i, selected_Q_K_i)
print(objects[k],"idx = ",i,"l =",l,".",string), plot_distance_flux(k,l)

k=10662 #### 7 min by Q (as a Query )
i = 7
l, string = if_a_query(k, i, selected_Q_K_i)
print(objects[k],"idx = ",i,"l =",l,".",string), plot_distance_flux(k,l)




In [None]:
import numpy as np

# Given data
f = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
index_no_match = 4
chunk_size = 4

# Initialize Q[l]
Q = np.array([])

# First part
Q = f[index_no_match : index_no_match + chunk_size : 2]
index_no_match = 2
f = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18,19])

# Second part
for i in range(1, chunk_size, 2):
    print(i)
    Q = np.insert(Q, i, f[index_no_match*2 + i : index_no_match*2 + i + 1])

# Print Q[l]
print(Q)
# 15 17 =2
# 13 15 =1

In [None]:
list1 = [0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9]
list2 = [[0,0,1,1],[1,1,2,2],[2,2,3,3],[3,3,4,4],[4,4,5,5]]

In [None]:
m=1
idx =3
chunk_size = 2 * (m + 1)
list1[idx*2:idx*2+chunk_size:2]

In [None]:
m=1
idx =3
chunk_size = 2 * (m + 1)
list1[idx*2:idx*2+chunk_size]

In [None]:
R = np.empty(len(objects), dtype=object)

m=1
chunk_size =2 * (m + 1)

for k in range(len(objects)):

    n = len(source[k])
    #num_chunks = n // chunk_size

    #chunks = [source[k][i*chunk_size : (i+1)*chunk_size] for i in range(num_chunks)]
    num_chunks = int(n // 2)-m 

    print(n,num_chunks)


    chunks = np.array([source[k][i*2 : (i*2+chunk_size)] for i in range(num_chunks)])
#     if m>0: 
#         for j in range(1, m+1):
#             chunks.extend([source[k][i*chunk_size +j*2 : (i+1)*chunk_size+j*2] for i in range(num_chunks-1)])
#     if m > 0:
#         for j in range(1, m + 1):
#             for i in range(num_chunks - 1):
#                 chunks.append(source[k][i * chunk_size + j * 2: (i + 1) * chunk_size + j * 2])

    #result = np.concatenate(np.array([no_match_test(chunk) for chunk in chunks]))  # Apply no_match_test directly
    result = np.array(list(map(no_match_test, chunks)))
    R[k] = result

    ### le

In [None]:
m=0
for j in range(1, max(1, m+1)):
    print(j*2)

if m>0: 
    for j in range(1, m+1):
        print(j*2)

In [None]:
import timeit

m = 100

code_snippet_1 = """
s_1 = np.zeros(n, dtype=float)
s_2 = np.zeros(n, dtype=float)
s_3 = np.zeros(n, dtype=float)
s_4 = np.zeros(n, dtype=float)
s_5 = np.zeros(n, dtype=float)
s_6 = np.zeros(n, dtype=float)
"""

code_snippet_2 = """
zeros = np.zeros(n, dtype=float)

s_1 = zeros.copy()
s_2 = zeros.copy()
s_3 = zeros.copy()
s_4 = zeros.copy()
s_5 = zeros.copy()
s_6 = zeros.copy()

"""

time_taken_1 = timeit.timeit(stmt=code_snippet_1, number=10000000, globals=globals())
time_taken_2 = timeit.timeit(stmt=code_snippet_2, number=10000000, globals=globals())

print("Time snippet 1:", time_taken_1)
print("Time snippet 2:", time_taken_2)



In [None]:
import timeit
# First code snippet
def first_code(): ##### this is better ! 
    I_i_j = concatenated_array[:]
    for j, sj in enumerate(sum_j_rows):
        for i, si in enumerate(sum_i_columns):
            dn = (si * sj)
            if dn == 0:
                I_i_j[j][i] *= np.inf
            else:
                I_i_j[j][i] *= total_matched/ dn
# Second code snippet
def second_code():
    I_i_j = concatenated_array[:] * total_matched
    for j, sj in enumerate(sum_j_rows):
        for i, si in enumerate(sum_i_columns):
            dn = (si * sj)
            if dn == 0:
                I_i_j[j][i] *= np.inf
            else:
                I_i_j[j][i] /= dn

# Test and compare the performance
num_iterations = 1

time_first = timeit.timeit(first_code, number=num_iterations)
time_second = timeit.timeit(second_code, number=num_iterations)

print("Time taken by first code snippet:", time_first)
print("Time taken by second code snippet:", time_second)


#

#

#

# distance dict to dataframe 

In [None]:
d

#

#

# Trash

In [None]:
# np.savez('nested_arrays.npz', array1=d, array2=R, array3=Q[:], array4= R_l, array5 = selected_Q_K_i, array6= num_by_obj)
# np.savez('nested_arrays.npz', array1=d, array2=R, array3=Q[:], array4=R_l, array5=selected_Q_K_i, array6=num_by_obj, additional_variable1=m, additional_variable2=additional_variable2)
# Load the arrays from the .npz file
#data = np.load('nested_arrays.npz', allow_pickle=True)

# # Retrieve the arrays from the loaded data
# d1 = data['array1']
# R1 = data['array2']
# Q1 = data['array3']

In [None]:
def plot_distance_flux_g(k,l):
    
    # # Plotting
    plt.figure(figsize=(15, 10))

    # Plot for F[k]
    plt.subplot(2, 1, 1)

    # plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', label='F[k] Even')
    #plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', label='F[k] Odd')

    for i in range(int(len(F[objects[k]])/2)):
        if source[objects[k]][2*i] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i], 
                     #sig[objects[k]][2*i]*0.4,
                     c='g', marker=marker)
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i], 
                         sig[objects[k]][2*i],
                         c='g', marker=marker)

    #     if source[k][2*i+1] == 0:
    #         marker = 'x'
    #     else:
    #         marker = 'o'
    #     plt.errorbar(i, F[k][2*i+1],
    #                  sig[k][2*i+1]*0.4,
    #                  c='r', marker=marker)
    # plt.plot([], [], color='red', marker='x', label='missing points !')
    plt.plot([], [], color='green', marker='o', label='origin')
    plt.plot([], [], color='green', marker='x', label='missing points !')
    # plt.plot([], [], color='red', marker='o', label='origin')


    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][::2], c='g', linewidth = 1)
    # plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', linewidth = 1)


    plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='C0', label='Q[l]',marker='.')
    #plt.plot(range(int(len(Q[0])/2)), Q[l][1::2], c='C1', label='Q[l]',marker='.')

    # Define the window of indices
    window_start = -0  # Index of the window start
    window_end = int(len(Q[l])/2 - 1)  # Index of the window end

    # Create an array of float indices
    indices = np.arange(window_start, window_end + 1)
    indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

    # Plot a shaded region for the window
    plt.fill_between(indices, min(Q[l][::2])/1.2, max(Q[l][::2])*1.2, color='gray', alpha=0.2)

    # plt.xlabel('Index')
    plt.ylabel('Flux')
    # plt.title('Flux Plot')
    plt.legend()

    # Plot for d[k][0]

    plt.subplot(2, 1, 2)
    plt.plot(range(len(d_g[k][l])), d_g[k][l],color='C0', linestyle='-',linewidth=1)
    matches = all_matches_g[k]
    for i, match_count in enumerate(matches):
        plt.text(i+0.2, d_g[k][l][i]+d_g[k][l][i]*5/100, str(match_count), fontsize=10, color='red', ha='left')


        # Plot dummy points with desired colors and markers
    plt.plot([], [], color='black', marker='o', label='Query chosed')
    plt.plot([], [], color='red', marker='x', label='missing cases !')
    plt.plot([], [], color='blue', marker='s', label='Matched here')
    plt.plot([], [], color='green', marker='*', label='Matches with a different `l` (Query)')
    plt.plot([], [], color='yellow', marker='^', label=f'Not matched with any of the {L_max} options we selected')

    for i, val in enumerate(R_l_g[k][l]):
            if val == -99:
                plt.scatter(i, d_g[k][l][i], color='red', marker='x', s=50)  # marker size 50
            elif val == l:
                plt.scatter(i, d_g[k][l][i], color='blue', marker='s', s=50)
            elif val == -2:
                plt.scatter(i, d_g[k][l][i], color='black', marker='o', s=50)  

            elif matches[i]==0:
                plt.scatter(i, d_g[k][l][i], color='yellow', marker='^', s=50)  
            else:
                plt.scatter(i, d_g[k][l][i], color='green', marker='*', s=75)




    plt.legend(fontsize=8) 

    plt.ylabel('distance')


    # # Plot for alphat[k][0]
    # plt.subplot(4, 1, 4)
    # plt.plot(range(len(alp[k][l][::2])), alp[k][l][::2], marker='.', linestyle='-',color='black')
    # plt.xlabel('Index')
    # plt.ylabel('alpha')
    # # plt.title('alpha Plot')


    plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
    plt.suptitle(f"{objects[k]}", fontname='Arial', fontsize=16, fontweight='bold')

    plt.show()



In [None]:
def plot_distance_flux_r(k,l):
    
    # # Plotting
    plt.figure(figsize=(15, 10))

    # Plot for F[k]
    plt.subplot(2, 1, 1)

    # plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', label='F[k] Even')
    #plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', label='F[k] Odd')

    for i in range(int(len(F[objects[k]])/2)):
#         if source[objects[k]][2*i] == 0:
#             marker = 'x'
#             plt.errorbar(i, F[objects[k]][2*i], 
#                      #sig[objects[k]][2*i]*0.4,
#                      c='g', marker=marker)
#         else:
#             marker = 'o'
#             plt.errorbar(i, F[objects[k]][2*i], 
#                          sig[objects[k]][2*i]*0.4,
#                          c='g', marker=marker)

        if source[objects[k]][2*i+1] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     #sig[objects[k]][2*i+1]*0.4,
                     c='r', marker=marker)
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     sig[objects[k]][2*i+1],
                     c='r', marker=marker)
            
    plt.plot([], [], color='red', marker='x', label='missing points !')
#     plt.plot([], [], color='green', marker='o', label='origin')
#     plt.plot([], [], color='green', marker='x', label='missing points !')
    plt.plot([], [], color='red', marker='o', label='origin')


#     plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][::2], c='g', linewidth = 1)
    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][1::2], c='r', linewidth = 1)


#     plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='C0', label='Q[l]',marker='.')
    plt.plot(range(int(len(Q[l])/2)), Q[l][1::2], c='C1', label='Q[l]',marker='.')

    # Define the window of indices
    window_start = -0  # Index of the window start
    window_end = int(len(Q[l])/2 - 1)  # Index of the window end

    # Create an array of float indices
    indices = np.arange(window_start, window_end + 1)
    indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

    # Plot a shaded region for the window
    plt.fill_between(indices, min(Q[l][1::2])/1.2, max(Q[l][1::2])*1.2, color='gray', alpha=0.2)

    # plt.xlabel('Index')
    plt.ylabel('Flux')
    # plt.title('Flux Plot')
    plt.legend()

    # Plot for d[k][0]

    plt.subplot(2, 1, 2)
    plt.plot(range(len(d_r[k][l])), d_r[k][l],color='C0', linestyle='-',linewidth=1)
    matches = all_matches_r[k]
#     for i, match_count in enumerate(matches):
#         print(match_count)
        #plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(match_count), fontsize=10, color='red', ha='left')


        # Plot dummy points with desired colors and markers
    plt.plot([], [], color='black', marker='o', label='Query chosed')
    plt.plot([], [], color='red', marker='x', label='missing cases !')
    plt.plot([], [], color='blue', marker='s', label='Matched here')
    plt.plot([], [], color='green', marker='*', label='Matches with a different `l` (Query)')
    plt.plot([], [], color='yellow', marker='^', label=f'Not matched with any of the {L_max} options we selected')

    for i, val in enumerate(R_l_r[k][l]):
            if val == -99:
                plt.scatter(i, d_r[k][l][i], color='red', marker='x', s=50)  # marker size 50
                plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(matches[i]), fontsize=10, color='red', ha='left')

            elif val == l:
                plt.scatter(i, d_r[k][l][i], color='blue', marker='s', s=50)
                plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(matches[i]), fontsize=10, color='blue', ha='left')

            elif val == -2:
                plt.scatter(i, d_r[k][l][i], color='black', marker='o', s=50) 
                plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(matches[i]), fontsize=10, color='black', ha='left')
            elif matches[i]==0:
                plt.scatter(i, d_r[k][l][i], color='yellow', marker='^', s=50) 
                plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(matches[i]), fontsize=10, color='yellow', ha='left')

            else:
                plt.scatter(i, d_r[k][l][i], color='green', marker='*', s=75)
                
                plt.text(i+0.2, d_r[k][l][i]+d_r[k][l][i]*5/100, str(matches[i]), fontsize=10, color='green', ha='left')





    plt.legend(fontsize=8) 

    plt.ylabel('distance')


    # # Plot for alphat[k][0]
    # plt.subplot(4, 1, 4)
    # plt.plot(range(len(alp[k][l][::2])), alp[k][l][::2], marker='.', linestyle='-',color='black')
    # plt.xlabel('Index')
    # plt.ylabel('alpha')
    # # plt.title('alpha Plot')


    plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
    plt.suptitle(f"{objects[k]}, k= {k}, l = {l}", fontname='Arial', fontsize=16, fontweight='bold')

    plt.show()

