In [None]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

# Retrieve data

We begin by fetching the reduced data using the Python script `data_transfer.py` from the file `df_merged.parquet`, then importing it into Pandas data frame.

In [None]:
df = pd.read_parquet('../scripts/df_merged.parquet', engine='pyarrow')

In [None]:
df.head(2)

In [None]:
df.shape

Here we extract all unique IDs from our data frame:

In [None]:
unique_ids = df['objectId'].unique().tolist()
len(unique_ids)

#

To calculate the weight values \(w_i\), we use the formula: `w_i` =\begin{cases}
\frac{1}{{\sigma_i^2}}, & \text{if data is available for day } i \\
0, & \text{otherwise}
\end{cases}

In [None]:
missing_data = (df['source'] == 0) | (df['dc_sigflux'] == 0)
df['dc_weight'] = np.where(missing_data, 0, 1 / (df['dc_sigflux'] ** 2))
df['nr_weight'] = np.where(missing_data, 0, 1 / (df['nr_sigflux'] ** 2))

In [None]:
df[['source','dc_weight']].head(3)

# 

# 

# With distance - factor test 

We group the data by shared ID and create `NumPy` arrays for flux, weighted flux, and the source test(if it's a missing day(data)). We also determine the length of each time series.

In [None]:
grouped = df.groupby('objectId')

F = grouped['dc_flux'].apply(lambda x: x.values)#.values
sig = grouped['dc_sigflux'].apply(lambda x: x.values)#.values
W = grouped['dc_weight'].apply(lambda x: x.values)#.values
source = grouped['source'].apply(lambda x: x.values)#.values
lengths = grouped['source'].apply(lambda x: len(x))#.values


# We are not working with this right now because it causes a problem (
# the arrangement of values in these arrays might not be aligned perfectly with our expectations,
# especially if the grouping operation resulted in different lengths of arrays for different groups)

We define the length of our query,window or chunk, along with the limit factor and the size of each window or chunk.

In [None]:
m=1
factor = 2*m+1 + 3*np.sqrt(2*(2*m+1))
chunk_size = 2 * (m + 1)

The `'no_match_test'` function evaluates whether there are any matches in the provided array, which contains the source test values for a window. If the sum of the array is less than or equal to 1, indicating that all values are missing or only one value is present, the function returns -99. Otherwise, it returns -1 to initialize the window's status as 'no match'.

In [None]:
def no_match_test(array):
    if array.sum() <= 1 : 
        return -99 ## all are missing, or only one is
    return -1 # initialise as no match  # can be modifieted ? 

"`objects`" list contains a subset of the objects we intend to work with.

"`L_max`" is defined to facilitate partial iteration, serving as a debugging aid by allowing a limit to be set on the number of iterations performed.

We initialize the NumPy arrays with `None` values.

In [None]:
objects = unique_ids[0:1000]
num_objects = len(objects)
L_max = int(num_objects/2)
# L_max = 5
print("L_max ", L_max)

# T = np.empty(num_objects, dtype=object)
# F = np.empty(num_objects, dtype=object)
# W = np.empty(num_objects, dtype=object)
# sig = np.empty(num_objects, dtype=object) ### we will remove it ! 
# source = np.empty(num_objects, dtype=object)
# lengths= np.empty(num_objects, dtype=object)

R = np.empty(num_objects, dtype=object)
R_l = np.empty((num_objects, L_max), dtype=object)
alp = np.empty((num_objects, L_max), dtype=object)
d = np.empty((num_objects, L_max), dtype=object)

In [None]:
indexes_objects = range(num_objects)
Q = [None] * (L_max)

We initialize R using the '`no_match_test`' function.

In [None]:
for k in indexes_objects:
    ######### I'm not convinced by this solution. While it worked well before, but rn it needs optimiz ! 
#     T[k] = df[df['objectId'] == objects[k]][['mjd','dc_flux','dc_sigflux' ,'dc_weight','source','fid']]
#     F[k] = T[k]['dc_flux'].values
#     W[k] = T[k]['dc_weight'].values
#     sig[k] = T[k]['dc_sigflux'].values
#     source[k] = T[k]['source'].values
#     lengths[k] = len(T[k])
#     n = lengths[k]
    n = lengths[objects[k]]
    num_chunks = int(n // 2)-m 

    #print(n,num_chunks)

    chunks = np.array([source[objects[k]][i*2 : (i*2+chunk_size)] for i in range(num_chunks)])
    result = np.array(list(map(no_match_test, chunks)))
    R[k] = result.copy()
    
    
    for l in range(L_max):
        R_l[k][l] = result.copy()
    # a for loop isn't a good idea !!!! 
    


### Loop to compute the distance of the subsequence in the time series to its nearest neighbor.

In [None]:
start_time = time.time()


l= 0
while (l < L_max):
    indexes_array = np.array([np.where(array == -1)[0] for array in R], dtype=object)
    
    has_non_empty_list = np.any([value.size > 0 for value in indexes_array])
    if not has_non_empty_list:
        print("break , l = ", l )
        break
        
    for k in range(len(R)): # we can remove the loop for here ! ???
          if indexes_array[k].size != 0:
            f = F[objects[k]]
            
            index_no_match = indexes_array[k][0]

            #print(index_no_match, R[k])
            k_Query_taked = k 
            
            Q[l] = f[index_no_match*2 : index_no_match*2 +chunk_size]
            break

    for k in range(len(objects)):
        f = F[objects[k]]
        w = W[objects[k]]
        n = lengths[objects[k]]
        n_c = n - 2*m # (number of chunks x 2) ! it's (n/2 - m) but to optimize we mutiply by 2 directly !  
        #print(n, n_c,len(R[k]))



        s_1 = np.zeros(n_c, dtype=float)
        s_2 = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1): 
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation

            s_1[:] += (f[j*2:j*2+ n_c]*h*w[j*2:j*2+ n_c])
            s_2[:] += (h**2 * w[j*2:j*2+ n_c])


        s_n = s_1[::2] + s_1[1::2]  
        s_d = s_2[::2] + s_2[1::2] 
        
        mask_no_0 = (s_d != 0)
        alp[k][l] = np.zeros_like(s_d, dtype=float)

        alp[k][l][mask_no_0] = s_n[mask_no_0] / s_d[mask_no_0] # # Perform division only where s_d(i) is not zero

        alpha = np.repeat(alp[k][l], 2) # duplicate alpha for each value (one for r and second for g)
     
    
        dd = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1):
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation
            
            dd[:] += ((f[j*2:j*2+ n_c] - alpha[:] * h)**2) * w[j*2:j*2+ n_c] 
            #alpha[:n-j*2] ==> alpha[:]

        d[k][l] = dd[::2] + dd[1::2]
        
        factor_comparison =  d[k][l] <= factor
                
        R[k][indexes_array[k][factor_comparison[indexes_array[k]]]] = l # explanation follows below!

#         print("indexes_array",indexes_array[k])
#         print("factor_comparison",factor_comparison)
#         print("factor_comparison[indexes_array]",factor_comparison[indexes_array[k]])
#         print("indexes_array[factor_comparison[indexes_array]]",indexes_array[k][factor_comparison[indexes_array[k]]])
#         print("R[k]",R[k])
#         print("indexes_array[factor_comparison[indexes_array]]",R[k][indexes_array[k][factor_comparison[indexes_array[k]]]])
#         print()
#         print()
        for i in range(len(factor_comparison)):
            if factor_comparison[i] and R_l[k][l][i] != -99:# and i != index_no_match: it isn't bad but It may 
                R_l[k][l][i] = l
                
        """for i in indexes_array[k]:
            #print(i)
            if d[k][l][i] <= factor : 
                R[k][i] = l"""
                
    R_l[k_Query_taked][l][index_no_match] = -2
#     R[k_Query_taked][index_no_match] = -2

    
     
    #print("l = ",l)
 
    l += 1 
    
print("l = ",l)



end_time = time.time()

# Compute the elapsed time
elapsed_time = end_time - start_time

print("Elapsed time:", elapsed_time, "seconds")

Let's break down the expression `R[k][indexes_array[k][factor_comparison[indexes_array[k]]]] = l` step by step:

1. `indexes_array[k]`: This selects the array of indexes corresponding to the k-th element of `indexes_array`.
2. `factor_comparison[indexes_array[k]]`: This applies boolean indexing to `factor_comparison` using the indexes from `indexes_array[k]`. It selects only the elements of `factor_comparison` corresponding to the indexes in `indexes_array[k]`.
3. `indexes_array[k][factor_comparison[indexes_array[k]]]`: This gives the indices where the condition `factor_comparison` is true for the k-th element of `indexes_array`.
4. `R[k][indexes_array[k][factor_comparison[indexes_array[k]]]]`: This uses the indices obtained in the previous step to select elements from the k-th row of `R`.
5. `= l`: Finally, it assigns the value `l` to the selected elements of `R[k]`.


In [None]:
np.savez('nested_arrays.npz', array1=d, array2=R, array3=Q)

# Load the arrays from the .npz file
data = np.load('nested_arrays.npz', allow_pickle=True)

# Retrieve the arrays from the loaded data
# d1 = data['array1']
# R1 = data['array2']
# Q1 = data['array3']

#

#

# plot the results 

In [None]:
k =0
l = 0
unique_ids[k]

In [None]:
#matches = number_matches(R_l[k])
def number_matches(R_l_k):
    # Get the length of the first sub-array
    length = len(R_l_k[0])

    # Initialize the list to store the number of matches for each index
    matches = [0] * length

    # Iterate through each sub-array
    for sub_array in R_l_k:
        # Iterate through each element and count matches for each index
        for i, elem in enumerate(sub_array):
            if elem >= 0:
                matches[i] += 1

    return matches

In [None]:
def number_matches_for_all_k(R_l):
    all_matches = []  # Initialize a list to store matches for all values of k

    # Iterate through each R_l[k]
    for R_l_k in R_l:
        # Get the length of the first sub-array
        length = len(R_l_k[0])

        # Initialize the list to store the number of matches for each index
        matches = [0] * length

        # Iterate through each sub-array
        for sub_array in R_l_k:
            # Iterate through each element and count matches for each index
            for i, elem in enumerate(sub_array):
                if elem >= 0:
                    matches[i] += 1

        all_matches.append(matches)  # Store matches for current k

    return all_matches


# all_matches will be a list where each element is matches for each k
all_matches = number_matches_for_all_k(R_l)


In [None]:
def lowest_values(all_matches):
    lowest_values = []  # Initialize a list to store the lowest 10 values

    # Iterate through each list of matches for each k
    for k, matches_k in enumerate(all_matches):
        # Iterate through each match count and index i
        for i, match_count in enumerate(matches_k):
            if R[k][i] !=-99:
                lowest_values.append((match_count, k, i))  # Store match count, k, and i

    # Sort the lowest values based on match count
    lowest_values.sort(key=lambda x: x[0])

    return lowest_values

# lowest_ will be a list of tuples where each tuple contains (match_count, k, i)
lowest = lowest_values(all_matches)
lowest

In [None]:
def plot_distance_flux(k,l):
    
    plt.figure(figsize=(14, 12))

    # Plot for F[k]
    plt.subplot(2, 1, 1)

    for i in range(int(len(F[objects[k]])/2)):
        if source[objects[k]][2*i] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i], 
                     #sig[objects[k]][2*i]*0.4,
                     c='C0', marker=marker)
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i], 
                     sig[objects[k]][2*i],
                     c='C0', marker=marker)

        if source[objects[k]][2*i+1] == 0:
            marker = 'x'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     #sig[objects[k]][2*i+1]*0.4,
                     c='C1', marker=marker)
            
        else:
            marker = 'o'
            plt.errorbar(i, F[objects[k]][2*i+1],
                     sig[objects[k]][2*i+1],
                     c='C1', marker=marker)
        
    plt.plot([], [], color='C1', marker='x', label='missing points !')
    plt.plot([], [], color='C0', marker='o', label='origin')
    plt.plot([], [], color='C0', marker='x', label='missing points !')
    plt.plot([], [], color='C1', marker='o', label='origin')

    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][::2], c='C0', linewidth = 1)
    plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][1::2], c='C1', linewidth = 1)

    plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='g', label='Q[l]',marker='.', linewidth=3, zorder=3)
    plt.plot(range(int(len(Q[l])/2)), Q[l][1::2], c='r', label='Q[l]',marker='.', linewidth=3, zorder=3)

    # Define the window of indices
    window_start = 0  # Index of the window start
    window_end = int(len(Q[l])/2 - 1)  # Index of the window end

    # Create an array of float indices
    indices = np.arange(window_start, window_end + 1)
    indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

    # Plot a shaded region for the window
    plt.fill_between(indices, min(Q[l])/1.2, max(Q[l])*1.2, color='gray', alpha=0.2)

    # plt.xlabel('Index')
    plt.ylabel('Flux')
    # plt.title('Flux Plot')
    plt.legend()




    ###############                       Plot for d[k][0]                       ###############

    plt.subplot(2, 1, 2)
    plt.plot(range(len(d[k][l])), d[k][l],color='C0', linestyle='-',linewidth=1)
    matches = all_matches[k]
    for i, match_count in enumerate(number_matches(R_l[k])):
        plt.text(i+0.2, d[k][l][i]+d[k][l][i]*5/100, str(match_count), fontsize=10, color='red', ha='left')


    # Plot dummy points with desired colors and markers
    plt.plot([], [], color='black', marker='o', label='Query chosed')
    plt.plot([], [], color='red', marker='x', label='missing cases !')
    plt.plot([], [], color='blue', marker='s', label='Matched here')
    plt.plot([], [], color='green', marker='*', label='Matches with a different `l` (Query)')
    plt.plot([], [], color='yellow', marker='^', label=f'Not matched with any of the {L_max} options we selected')

    for i, val in enumerate(R_l[k][l]):
        if val == -99:
            plt.scatter(i, d[k][l][i], color='red', marker='x', s=50)  # marker size 50
        elif val == l:
            plt.scatter(i, d[k][l][i], color='blue', marker='s', s=50)
        elif val == -2:
            plt.scatter(i, d[k][l][i], color='black', marker='o', s=50)  
        #elif (val == -1) and (R[k][i] == -1 or R[k][i] == -2):
        elif matches[i]==0:
            plt.scatter(i, d[k][l][i], color='yellow', marker='^', s=50)  
        else:
            plt.scatter(i, d[k][l][i], color='green', marker='*', s=75)
            
            


    plt.legend(fontsize=8) 
    #plt.legend(loc='upper right', fontsize=8)

    plt.ylabel('distance')



    ###############                       Plot for alpha[k][l]                       ###############

#     plt.subplot(3, 1, 3)
#     plt.plot(range(len(alp[k][l])), alp[k][l], marker='.', linestyle='-',color='black')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     # plt.title('alpha Plot')




    plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
    plt.suptitle(f"{objects[k]}, k = {k}, l = {l} ", fontname='Arial', fontsize=16, fontweight='bold')
    #plt.tight_layout()
    plt.show()



# plot by multiple l

In [None]:
l_values = [8,0,1,2,3]#range(L_max)  # Replace num_values_of_l with the actual number of l values
k=1
for l in l_values:
    
    plot_distance_flux(k,l)


# plot  with one l 

In [None]:
# # Plotting
plt.figure(figsize=(14, 12))
# plt.subplots_adjust(top=0.80)  # Adjust the top margin for the super title
# Plot for F[k]
plt.subplot(3, 1, 1)

# plt.errorbar(range(int(len(F[k])/2)), F[k][::2], W[k][::2], c='g', label='F[k] green',marker='o')
# #plt.errorbar(range(int(len(F[k])/2)), F[k][1::2], W[k][1::2], c='r', label='F[k] red',marker='o')

for i in range(int(len(F[objects[k]])/2)):
    if source[objects[k]][2*i] == 0:
        marker = 'x'
    else:
        marker = 'o'
    plt.errorbar(i, F[objects[k]][2*i], 
                 sig[objects[k]][2*i]*0.4,
                 c='C0', marker=marker)
    
    if source[objects[k]][2*i+1] == 0:
        marker = 'x'
        plt.errorbar(i, F[objects[k]][2*i+1],
                 #sig[objects[k]][2*i+1]*0.4,
                 c='C1', marker=marker)
    else:
        marker = 'o'
        plt.errorbar(i, F[objects[k]][2*i+1],
                 sig[objects[k]][2*i+1]*0.4,
                 c='C1', marker=marker)
plt.plot([], [], color='C1', marker='x', label='missing points !')
plt.plot([], [], color='C0', marker='o', label='origin')
plt.plot([], [], color='C0', marker='x', label='missing points !')
plt.plot([], [], color='C1', marker='o', label='origin')

plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][::2], c='C0', linewidth = 1)
plt.plot(range(int(len(F[objects[k]])/2)), F[objects[k]][1::2], c='C1', linewidth = 1)

plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='g', label='Q[l]',marker='.', linewidth=3, zorder=3)
plt.plot(range(int(len(Q[l])/2)), Q[l][1::2], c='r', label='Q[l]',marker='.', linewidth=3, zorder=3)

# Define the window of indices
window_start = -0  # Index of the window start
window_end = int(len(Q[l])/2 - 1)  # Index of the window end

# Create an array of float indices
indices = np.arange(window_start, window_end + 1)
indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

# Plot a shaded region for the window
plt.fill_between(indices, min(Q[l])/1.2, max(Q[l])*1.2, color='gray', alpha=0.2)

# plt.xlabel('Index')
plt.ylabel('Flux')
# plt.title('Flux Plot')
plt.legend()




#                                           Plot for d[k][0]                                      #

plt.subplot(3, 1, 2)
plt.plot(range(len(d[k][l])), d[k][l],color='C0', linestyle='-',linewidth=1)


# Plot dummy points with desired colors and markers
plt.plot([], [], color='black', marker='o', label='Query chosed')
plt.plot([], [], color='red', marker='x', label='missing cases !')
plt.plot([], [], color='blue', marker='s', label='Matched here')
plt.plot([], [], color='green', marker='*', label='Matches with a different `l` (Query)')
plt.plot([], [], color='yellow', marker='^', label=f'Not matched with any of the {L_max} options we selected')

for i, val in enumerate(R_l[k][l]):
    if val == -99:
        plt.scatter(i, d[k][l][i], color='red', marker='x', s=50)  # Increase the marker size to 100
    elif val == l:
        plt.scatter(i, d[k][l][i], color='blue', marker='s', s=50)  # Increase the marker size to 100
    elif val == -2:
        plt.scatter(i, d[k][l][i], color='black', marker='o', s=50)  # Increase the marker size to 100
    elif (val == -1) and (R[k][i] == -1):
        plt.scatter(i, d[k][l][i], color='yellow', marker='^', s=50)  # Increase the marker size to 100
    else:
        plt.scatter(i, d[k][l][i], color='green', marker='*', s=75)  # Increase the marker size to 100



plt.legend(fontsize=8) 
#plt.legend(loc='upper right', fontsize=8)

plt.ylabel('distance')



#                                           Plot for alpha[k][l]                                      #

plt.subplot(3, 1, 3)
plt.plot(range(len(alp[k][l])), alp[k][l], marker='.', linestyle='-',color='black')
plt.xlabel('Index')
plt.ylabel('Value')
# plt.title('alpha Plot')




plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
plt.suptitle(f"{objects[k]}, k = {k}, l = {l} ", fontname='Arial', fontsize=16, fontweight='bold')
#plt.tight_layout()
plt.show()



#

# Distinguishing the two cases!

In [None]:
m=1 # here the minimin is 1 !! 
factor = 2*m+1 + 3*np.sqrt(2*(2*m+1))
chunk_size = 2 * (m + 1)

In [None]:
# def no_match_test_s(array):
#     global m
#     if array.sum() <= 1 :
#         if m == 0 and array.sum() == 0:
#             return -99 ## all are missing, or all except one are
#         elif m != 0:
#             return -99
#     return -1 # initialise as no match  # can be modifieted ? 



In [None]:
objects = unique_ids[0:10]
num_objects = len(objects)
L_max = int(num_objects/2)

print("L_max ", L_max)


T = np.empty(num_objects, dtype=object)
F = np.empty(num_objects, dtype=object)
W = np.empty(num_objects, dtype=object)
source = np.empty(num_objects, dtype=object)
lengths= np.empty(num_objects, dtype=object)

R_r = np.empty(num_objects, dtype=object)
R_g = np.empty(num_objects, dtype=object)
alp = np.empty((num_objects, L_max), dtype=object)
d_r = np.empty((num_objects, L_max), dtype=object)
d_g = np.empty((num_objects, L_max), dtype=object)

In [None]:
indexes_objects = range(num_objects)
Q = [None] * (L_max)

In [None]:
for k in indexes_objects:
    T[k] = df[df['objectId'] == objects[k]][['mjd','dc_flux', 'dc_weight','source']]
    F[k] = T[k]['dc_flux'].values
    W[k] = T[k]['dc_weight'].values
    source[k] = T[k]['source'].values
    lengths[k] = len(T[k])
    n = lengths[k]
    num_chunks = int(n // 2)-m 
    
    chunks_g = np.array([source[k][i*2 : (i*2+chunk_size):2] for i in range(num_chunks)])
    chunks_r = np.array([source[k][1+i*2 : (i*2+chunk_size):2] for i in range(num_chunks)])
    result_r = np.array(list(map(no_match_test, chunks_r)))
    result_g = np.array(list(map(no_match_test, chunks_g)))
    
    R_r[k] = result_r
    R_g[k] = result_g

In [None]:
start_time = time.time()


l= 0
while (l < L_max):
    indexes_array_r = np.array([np.where(array == -1)[0] for array in R_r], dtype=object)
    indexes_array_g = np.array([np.where(array == -1)[0] for array in R_g], dtype=object)
    
    empty_lists = np.all([value.size == 0 for value in indexes_array_r]) or np.all([value.size == 0 for value in indexes_array_g])
    if empty_lists:
        print("break , l = ", l )
        break
        
    for k in range(len(R)): # we can remove the loop for here ! ???
          if np.any(indexes_array_g[k]):
            f = F[k]
            
            index_no_match = indexes_array_g[k][0]  * 2   # here x 2 because we have the indexes only for g (I need to check this point)        
            Q[l] = f[index_no_match : index_no_match +chunk_size:2]
            break
            
    for k in range(len(R)): # we can remove the loop for here ! ???
          if np.any(indexes_array_r[k]):
            f = F[k]
            
            index_no_match = indexes_array_r[k][0]  * 2
            for i in range(1, chunk_size, 2):
                 Q[l] = np.insert(Q[l], i, f[index_no_match + i : index_no_match + i + 1])

            #Q[l] = f[index_no_match : index_no_match +chunk_size]
            break    
    
    
    
    for k in range(len(objects)):
        f = F[k]
        w = W[k]
        n = lengths[k]
        n_c = n - 2*m # (number of chunks x 2) ! it's (n/2 - m) but to optimize we mutiply by 2 directly !  
        #print(n, n_c,len(R[k]))



        s_1 = np.zeros(n_c, dtype=float)
        s_2 = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1): 
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation

            s_1[:] += (f[j*2:j*2+ n_c]*h*w[j*2:j*2+ n_c])
            s_2[:] += (h**2 * w[j*2:j*2+ n_c])

        s_n = s_1#[::2] + s_1[1::2]  # this needs to optimizate with new variables ! 
        s_d = s_2#[::2] + s_2[1::2] 
        
        mask_no_0 = (s_d != 0)
        alp[k][l] = np.zeros_like(s_d, dtype=float)

        alp[k][l][mask_no_0] = s_n[mask_no_0] / s_d[mask_no_0] # # Perform division only where s_d(i) is not zero

        alpha = alp[k][l]#np.repeat(alp[k][l], 2) # duplicate alpha for each value (one for r and second for g)
     
    
        dd = np.zeros(n_c, dtype=float)
        
        for j in range(0,m+1):
            h = np.tile(Q[l][j*2: j*2+2], (len(f[j*2:j*2+ n_c]) // 2, 1)).ravel() # array of h for r and g successive for the vectorisation
            
            dd[:] += ((f[j*2:j*2+ n_c] - alpha[:] * h)**2) * w[j*2:j*2+ n_c] 
            #alpha[:n-j*2] ==> alpha[:]

        d_g[k][l] = dd[::2]
        d_r[k][l] = dd[1::2]
        
        """factor_comparison =  d_g[k][l] <= factor
        R_g[k][indexes_array_g[k][factor_comparison[indexes_array_g[k]]]] = l # explanation follows below!
                
        factor_comparison =  d_r[k][l] <= factor
        R_r[k][indexes_array_r[k][factor_comparison[indexes_array_r[k]]]] = l # explanation follows below!"""
        

        for i in indexes_array_g[k]:
            if d_g[k][l][i] <= factor : 
                R_g[k][i] = l
        for i in indexes_array_r[k]:
            if d_r[k][l][i] <= factor : 
                R_r[k][i] = l
     
    #print("l = ",l)
    
    l += 1 
    
print("l = ",l)



end_time = time.time()

# Compute the elapsed time
elapsed_time = end_time - start_time

print("Elapsed time:", elapsed_time, "seconds")

#

In [None]:
k = 0
l = 0
unique_ids[k]

In [None]:
# # Plotting
plt.figure(figsize=(18, 20))

# Plot for F[k]
plt.subplot(4, 1, 1)

# plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', label='F[k] Even')
#plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', label='F[k] Odd')

for i in range(int(len(F[k])/2)):
    if source[k][2*i] == 0:
        marker = 'x'
    else:
        marker = 'o'
    plt.errorbar(i, F[k][2*i], 
                 sig[k][2*i]*0.4,
                 c='g', marker=marker)
    
#     if source[k][2*i+1] == 0:
#         marker = 'x'
#     else:
#         marker = 'o'
#     plt.errorbar(i, F[k][2*i+1],
#                  sig[k][2*i+1]*0.4,
#                  c='r', marker=marker)
# plt.plot([], [], color='red', marker='x', label='missing points !')
plt.plot([], [], color='green', marker='o', label='origin')
plt.plot([], [], color='green', marker='x', label='missing points !')
# plt.plot([], [], color='red', marker='o', label='origin')


plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', linewidth = 1)
# plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', linewidth = 1)


plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='C0', label='Q[l]',marker='.')
#plt.plot(range(int(len(Q[0])/2)), Q[l][1::2], c='C1', label='Q[l]',marker='.')

# Define the window of indices
window_start = -0  # Index of the window start
window_end = int(len(Q[l])/2 - 1)  # Index of the window end

# Create an array of float indices
indices = np.arange(window_start, window_end + 1)
indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

# Plot a shaded region for the window
plt.fill_between(indices, min(Q[l][::2])/1.2, max(Q[l][::2])*1.2, color='gray', alpha=0.2)

# plt.xlabel('Index')
plt.ylabel('Flux')
# plt.title('Flux Plot')
plt.legend()

# Plot for d[k][0]
plt.subplot(4, 1, 2)
plt.plot(range(len(d_g[k][l])), d_g[k][l], marker='o', linestyle='-')
# plt.xlabel('Index')
plt.ylabel('distance')
# plt.title('distance Plot')

# Plot for alphat[k][0]
plt.subplot(4, 1, 4)
plt.plot(range(len(alp[k][l][::2])), alp[k][l][::2], marker='.', linestyle='-',color='black')
plt.xlabel('Index')
plt.ylabel('alpha')
# plt.title('alpha Plot')


plt.subplot(4, 1, 3)
# Plot dummy points with desired colors and markers
plt.plot([], [], color='red', marker='x', label='missing cases !')
plt.plot([], [], color='blue', marker='s', label='Matched here')
plt.plot([], [], color='green', marker='s', label='Matches with a different `l` (possibly a smaller `l` from a previous instance)')

for i, val in enumerate(R_g[k]):
    if val == -99:
        plt.scatter(i, 0, color='red', marker='x')
    elif val == l:
        plt.scatter(i, 0, color='blue', marker='s')
    else:
        plt.scatter(i, 0, color='green', marker='s')


# plt.xlabel('Index')
plt.ylabel('R[k]')
# plt.title('R[k] Plot')
plt.legend()  # Use unique custom legend labels
        

plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
plt.suptitle(f"{objects[k]}", fontname='Arial', fontsize=16, fontweight='bold')

plt.show()



In [None]:
# # Plotting
plt.figure(figsize=(18, 20))

# Plot for F[k]
plt.subplot(4, 1, 1)

# plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', label='F[k] Even')
# plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', label='F[k] Odd')

for i in range(int(len(F[k])/2)):
#     if source[k][2*i] == 0:
#         marker = 'x'
#     else:
#         marker = 'o'
#     plt.errorbar(i, F[k][2*i], 
#                  sig[k][2*i]*0.4,
#                  c='g', marker=marker)
    
    if source[k][2*i+1] == 0:
        marker = 'x'
    else:
        marker = 'o'
    plt.errorbar(i, F[k][2*i+1],
                 sig[k][2*i+1],
                 c='r', marker=marker)
plt.plot([], [], color='red', marker='x', label='missing points !')
# plt.plot([], [], color='green', marker='o', label='origin')
# plt.plot([], [], color='green', marker='x', label='missing points !')
plt.plot([], [], color='red', marker='o', label='origin')


# plt.plot(range(int(len(F[k])/2)), F[k][::2], c='g', linewidth = 1)
plt.plot(range(int(len(F[k])/2)), F[k][1::2], c='r', linewidth = 1)


# plt.plot(range(int(len(Q[l])/2)), Q[l][::2], c='C0', label='Q[l]',marker='.')
plt.plot(range(int(len(Q[l])/2)), Q[l][1::2], c='C1', label='Q[l]',marker='.')

# Define the window of indices
window_start = -0  # Index of the window start
window_end = int(len(Q[l])/2 - 1)  # Index of the window end

# Create an array of float indices
indices = np.arange(window_start, window_end + 1)
indices = np.concatenate(([indices[0] - 0.5], indices, [indices[-1] + 0.5]))

# Plot a shaded region for the window
plt.fill_between(indices, min(Q[l][1::2])/1.2, max(Q[l][1::2])*1.2, color='gray', alpha=0.2)

# plt.xlabel('Index')
plt.ylabel('Flux')
# plt.title('Flux Plot')
plt.legend()

# Plot for d[k][0]
plt.subplot(4, 1, 2)
plt.plot(range(len(d_r[k][l])), d_r[k][l], marker='o', linestyle='-')
# plt.xlabel('Index')
plt.ylabel('distance')
# plt.title('distance Plot')

# Plot for alphat[k][0]
plt.subplot(4, 1, 4)
plt.plot(range(len(alp[k][l][1::2])), alp[k][l][1::2], marker='.', linestyle='-',color='black')
plt.xlabel('Index')
plt.ylabel('alpha')
# plt.title('alpha Plot')


plt.subplot(4, 1, 3)
# Plot dummy points with desired colors and markers
plt.plot([], [], color='red', marker='x', label='missing cases !')
plt.plot([], [], color='blue', marker='s', label='Matched here')
plt.plot([], [], color='green', marker='s', label='Matches with a different `l` (possibly a smaller `l` from a previous instance)')

for i, val in enumerate(R_r[k]):
    if val == -99:
        plt.scatter(i, 0, color='red', marker='x')
    elif val == l:
        plt.scatter(i, 0, color='blue', marker='s')
    else:
        plt.scatter(i, 0, color='green', marker='s')


# plt.xlabel('Index')
plt.ylabel('R[k]')
# plt.title('R[k] Plot')
plt.legend()  # Use unique custom legend labels
        

plt.subplots_adjust(top=0.93)  # Adjust the top margin for the super title
plt.suptitle(f"{objects[k]}, k = {k}, l = {l} ", fontname='Arial', fontsize=16, fontweight='bold')

plt.show()



In [None]:
import numpy as np

# Given data
f = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
index_no_match = 0
chunk_size = 6

# Initialize Q[l]
Q = np.array([])

# First part
Q = f[index_no_match : index_no_match + chunk_size : 2]
index_no_match = 0
f = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18])

# Second part
for i in range(1, chunk_size, 2):
    Q = np.insert(Q, i, f[index_no_match + i : index_no_match + i + 1])

# Print Q[l]
print(Q)


#

#

#

#

#

#

#

#

#

# TEST

In [None]:
list1 = [0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9]
list2 = [[0,0,1,1],[1,1,2,2],[2,2,3,3],[3,3,4,4],[4,4,5,5]]

In [None]:
m=1
idx =3
chunk_size = 2 * (m + 1)
list1[idx*2:idx*2+chunk_size:2]

In [None]:
m=1
idx =3
chunk_size = 2 * (m + 1)
list1[idx*2:idx*2+chunk_size]

In [None]:
R = np.empty(len(objects), dtype=object)

m=1
chunk_size =2 * (m + 1)

for k in range(len(objects)):

    n = len(source[k])
    #num_chunks = n // chunk_size

    #chunks = [source[k][i*chunk_size : (i+1)*chunk_size] for i in range(num_chunks)]
    num_chunks = int(n // 2)-m 

    print(n,num_chunks)


    chunks = np.array([source[k][i*2 : (i*2+chunk_size)] for i in range(num_chunks)])
#     if m>0: 
#         for j in range(1, m+1):
#             chunks.extend([source[k][i*chunk_size +j*2 : (i+1)*chunk_size+j*2] for i in range(num_chunks-1)])
#     if m > 0:
#         for j in range(1, m + 1):
#             for i in range(num_chunks - 1):
#                 chunks.append(source[k][i * chunk_size + j * 2: (i + 1) * chunk_size + j * 2])

    #result = np.concatenate(np.array([no_match_test(chunk) for chunk in chunks]))  # Apply no_match_test directly
    result = np.array(list(map(no_match_test, chunks)))
    R[k] = result

    ### le

In [None]:
m=0
for j in range(1, max(1, m+1)):
    print(j*2)

if m>0: 
    for j in range(1, m+1):
        print(j*2)

In [None]:
import timeit

m = 100

code_snippet_1 = """
s_1 = np.zeros(n, dtype=float)
s_2 = np.zeros(n, dtype=float)
s_3 = np.zeros(n, dtype=float)
s_4 = np.zeros(n, dtype=float)
s_5 = np.zeros(n, dtype=float)
s_6 = np.zeros(n, dtype=float)
"""

code_snippet_2 = """
zeros = np.zeros(n, dtype=float)

s_1 = zeros.copy()
s_2 = zeros.copy()
s_3 = zeros.copy()
s_4 = zeros.copy()
s_5 = zeros.copy()
s_6 = zeros.copy()

"""

time_taken_1 = timeit.timeit(stmt=code_snippet_1, number=10000000, globals=globals())
time_taken_2 = timeit.timeit(stmt=code_snippet_2, number=10000000, globals=globals())

print("Time snippet 1:", time_taken_1)
print("Time snippet 2:", time_taken_2)



#

#

#

# distance dict to dataframe 

In [None]:
d

#

#

# Trash