In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

primary_results = []
secondary_results = []

for k in range(100):
    if k == 50:
        continue
    data = pd.read_csv(f"D:/ReClusterData/reclustered{k}_data.csv")  
    summations_per_label = np.zeros(20)
    correlation_matrix = np.zeros((20, 20))
    
    for i in range(20):
        summation = 0
        for j in range(20):
            cluster_i_values = data[data['Cluster_Label'] == i]['Serving RSSI_4G']
            cluster_j_values = data[data['Cluster_Label'] == j]['Serving RSSI_4G']
            
            min_len = min(len(cluster_i_values), len(cluster_j_values))
            if min_len > 0:
                cluster_i_values = cluster_i_values[:min_len]
                cluster_j_values = cluster_j_values[:min_len]
                correlation_matrix[i, j] = np.corrcoef(cluster_i_values, cluster_j_values)[0, 1]
            else:
                correlation_matrix[i, j] = 0
    
            summation += correlation_matrix[i, j]
        summations_per_label[i] = summation
    
    correlation_matrix = np.nan_to_num(correlation_matrix, nan=0.0)

    def find_primary_secondary(co_matrix):
        primary_sum = 0
        primary_node = 0
        secondary_node = []
        for i in range(20):
            sum = 0
    
            for j in range(20):
                sum += co_matrix[i][j]
            
            if sum > primary_sum:
                primary_sum = sum
                primary_node = i
        
        for i in range(20):
            if co_matrix[primary_node][i] >= 0.3 and primary_node != i:
                secondary_node.append(i)
    
        return primary_node, secondary_node

    def delete_secondary_primary(co_matrix, primary_node, secondary_node):
        n = len(secondary_node)
        for i in range(20):
            co_matrix[primary_node][i] = 0
            co_matrix[i][primary_node] = 0
            for j in range(n):
                ele = secondary_node[j]
                co_matrix[ele][i] = 0
                co_matrix[i][ele] = 0
        return co_matrix

    print()
    print(f"For matrix {k}:")
    print()
    c = 0
    for i in range(20):
        p_node, s_node = find_primary_secondary(correlation_matrix)
        if any(item == 0 for item in s_node):
            c += 1
        if p_node == 0:
            c += 1
        if c == 2:
            break
        correlation_matrix = delete_secondary_primary(correlation_matrix, p_node, s_node)
        print('Primary Node:', p_node)
        if s_node:
            print('Secondary Nodes:', s_node)
        
        # Primary node median values
        primary_data = data[data['Cluster_Label'] == p_node]
        median_longitude = primary_data['Longitude'].median()
        median_latitude = primary_data['Latitude'].median()
        median_rssi = primary_data['Serving RSSI_4G'].median()

        # Append primary node results
        primary_results.append({
            'slNo': len(primary_results) + 1,
            'Cluster': k,
            'Node': p_node,
            'Median Longitude': median_longitude,
            'Median Latitude': median_latitude,
            'Median RSSI': median_rssi
        })

        # Secondary nodes median values
        for sec_node in s_node:
            secondary_data = data[data['Cluster_Label'] == sec_node]
            median_longitude = secondary_data['Longitude'].median()
            median_latitude = secondary_data['Latitude'].median()
            median_rssi = secondary_data['Serving RSSI_4G'].median()

            # Append secondary node results
            secondary_results.append({
                'slNo': len(secondary_results) + 1,
                'Cluster': k,
                'Node': sec_node,
                'Median Longitude': median_longitude,
                'Median Latitude': median_latitude,
                'Median RSSI': median_rssi
            })

# Convert the primary and secondary results to DataFrames
primary_results_df = pd.DataFrame(primary_results)
secondary_results_df = pd.DataFrame(secondary_results)

# Define file paths
primary_file_path = Path("D:/Primary_Data.csv")
secondary_file_path = Path("D:/Secondary_Data.csv")

# Save the DataFrames to CSV files
primary_results_df.to_csv(primary_file_path, index=False)
secondary_results_df.to_csv(secondary_file_path, index=False)
