In [None]:
import math
import pandas as pd
from tkinter import Tk, filedialog
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.cluster import KMeans
from pyclustering.cluster.kmedoids import kmedoids #PAM
from sklearn.cluster import DBSCAN
from sklearn.neighbors import KernelDensity
from sklearn.neighbors import NearestNeighbors
import os

#from your_wavecluster_library import WaveCluster  # Replace with the actual import
import numpy as np
import pywt
import networkx as nx
from scipy.cluster.hierarchy import linkage, fcluster # For assign_labels()


from sklearn.metrics.pairwise import euclidean_distances # for CURE
from sklearn.preprocessing import StandardScaler
#from cure import cure  # You may need to install a library that implements CURE algorithm

from sklearn.neighbors import kneighbors_graph
from sklearn.cluster import AgglomerativeClustering

#Cluster Evaluation
from sklearn.metrics import silhouette_score
from sklearn.utils import resample
from sklearn.model_selection import cross_val_score
from sklearn.utils import check_random_state

#RS
import random
from numpy import genfromtxt
import copy
import timeit
from scipy.spatial import ConvexHull, distance
import collections

#Feature Selection
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_selection import SelectKBest, f_classif
#from sklearn.feature_selection import 

from sklearn_extra.cluster import KMedoids
from sklearn.metrics.pairwise import manhattan_distances

#Filter Method
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cdist

#mRMR
#from skfeature.function.information_theoretical_based import MRMR
#from pymrmr import mRMR
from sklearn.feature_selection import mutual_info_classif

#S_Dbw
from sklearn.metrics import silhouette_score, davies_bouldin_score

<b>imports</b>

In [None]:
def Read_DataFrame(file_path):
    """
    Read an Excel file and convert it into a DataFrame.
    
    Parameters:
    file_path (str): Path to the Excel file.
    
    Returns:
    pandas.DataFrame: DataFrame containing the data from the Excel file.
    """
    try:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path)
        return df
    except Exception as e:
        print("Error:", e)
        return None

In [None]:
def choose_excel_file():
    """
    Open a file dialog to choose an Excel file.

    Returns:
    - str: Path to the selected Excel file.
    """
    root = Tk()
    root.withdraw()  # Hide the main window

    file_path = filedialog.askopenfilename(
        title="Select Excel file",
        filetypes=[("Excel files", "*.xlsx;*.xls")],
    )

    return file_path

In [None]:
file_path = choose_excel_file()

dataframe = Read_DataFrame(file_path)

if dataframe is not None:
    print("DataFrame created successfully.")
    print(dataframe.head())  # Display the first few rows of the DataFrame
else:
    print("Failed to create DataFrame.")

In [None]:
TC_ID_df = dataframe.copy()

dataframe = dataframe.drop(columns=['TC_ID'])

<b>Preprocessing</b>

In [None]:
def preprocess_data(dataframe):
    """
    Preprocess the DataFrame by encoding categorical columns.

    Parameters:
    - dataframe (pandas.DataFrame): Input DataFrame.

    Returns:
    - pandas.DataFrame: Processed DataFrame with numerical values.
    """
    le = LabelEncoder()
    for column in dataframe.columns:
        if dataframe[column].dtype == 'object':
            dataframe[column] = le.fit_transform(dataframe[column]).astype('int64')

    return dataframe

In [None]:
def fill_na_with_mean(dataframe):
    """
    Replace NaN or null values in a DataFrame with the mean of each column.

    Parameters:
    - dataframe: pandas DataFrame

    Returns:
    - DataFrame with NaN values replaced by mean
    """
    return dataframe.fillna(dataframe.mean()).astype('int64')

In [None]:
dataframe = preprocess_data(dataframe)
dataframe = fill_na_with_mean(dataframe)

In [None]:
def remove_single_value_columns(df):
    """
    Remove columns from a DataFrame that have only one unique value across all rows.

    Parameters:
    - df: pandas DataFrame

    Returns:
    - Modified DataFrame with single-value columns removed
    """
    # Identify columns with only one unique value
    single_value_columns = df.columns[df.nunique() == 1]

    # Drop columns with only one unique value
    df = df.drop(single_value_columns, axis=1)

    return df

In [None]:
Non_Single_value_df = remove_single_value_columns(dataframe)

In [None]:
Non_Single_value_df.columns

In [None]:
Non_Single_value_df

In [None]:
# Reset the index and add it as a column
df_reset = Non_Single_value_df.reset_index()

In [None]:
df_reset

<b>PCA</b>

In [None]:
def apply_pca(dataframe, num_components=dataframe.shape[1]):
    """
    Apply Principal Component Analysis (PCA) to the input DataFrame.

    Parameters:
    - dataframe (pd.DataFrame): Input DataFrame.
    - num_components (int or None): Number of components to keep. If None, keeps all components.

    Returns:
    - pd.DataFrame: DataFrame containing PCA results.
    """

    # Extract features (X)
    X = dataframe.values

    # Apply PCA
    pca = PCA(n_components=num_components)
    pca_result = pca.fit_transform(X)

    # Get eigenvalues and indices
    eigenvalues = pca.explained_variance_
    print(type(eigenvalues))
    print(eigenvalues)
    indices = eigenvalues.argsort()[::-1]

    # Order the columns based on eigenvalues
    pca_columns = [f'PC{i + 1}' for i in range(num_components)]
    ordered_pca_columns = [pca_columns[i] for i in indices]
    pca_dataframe = pd.DataFrame(data=pca_result, columns=ordered_pca_columns)

    # Sort eigenvalues
    sorted_eigenvalues = eigenvalues[indices]

    return pca_dataframe, sorted_eigenvalues, pca


In [None]:
Non_Single_value_df.shape[1]

In [None]:
pca_list = list()
feature_weight_list = list()

# Fit a range of PCA models

for n in range(1, Non_Single_value_df.shape[1] + 1):
    
    # Create and fit the model
    PCAmod = PCA(n_components=n)
    PCAmod.fit(Non_Single_value_df)
    
    # Store the model and variance
    pca_list.append(pd.Series({'n':n, 'model':PCAmod,
                               'var': PCAmod.explained_variance_ratio_.sum()}))
    
    # Calculate and store feature importances
    abs_feature_values = np.abs(PCAmod.components_).sum(axis=0)
    feature_weight_list.append(pd.DataFrame({'n':n, 
                                             'features': Non_Single_value_df.columns,
                                             'values':abs_feature_values/abs_feature_values.sum()}))
    
pca_df = pd.concat(pca_list, axis=1).T.set_index('n')
pca_df

In [None]:
features_df = (pd.concat(feature_weight_list)
               .pivot(index='n', columns='features', values='values')) #Sum up all of the n

features_df

In [None]:
sns.set_context('talk')
ax = pca_df['var'].plot(kind='bar')

ax.set(xlabel='Number of dimensions',
       ylabel='Percent explained variance',
       title='Explained Variance vs Dimensions');

In [None]:
ax = features_df.plot(kind='bar', figsize=(13,8))
ax.legend(loc='upper right')
ax.set(xlabel='Number of dimensions',
       ylabel='Relative importance',
       title='Feature importance vs Dimensions');

In [None]:
Non_Single_value_df

In [None]:
Non_Single_value_df

In [None]:
#Remove Time.Wc
Non_Single_value_df = Non_Single_value_df.drop(columns=['Time.WC'])

In [None]:
Pdf = preprocess_data(Non_Single_value_df)
pca_result_df, eigenvalues, pca_model  = apply_pca(Pdf,Pdf.shape[1])

In [None]:
print(eigenvalues)

In [None]:
print(pca_model)

In [None]:
pca_result_df

In [None]:
pca_result_df_3 = pca_result_df.iloc[:,0:3]
pca_result_df_3

In [None]:
pca_result_df = pca_result_df_3

<b>Going Back to Original</b>

In [None]:
#Original
Non_Single_value_df

<b>Chameleon</b>

In [None]:
def calculate_distance(dataframe, centroids, cluster_labels):
    distances = np.zeros(len(dataframe))

    for i, (label, row) in enumerate(zip(cluster_labels, dataframe.iterrows())):
        centroid = centroids[label]
        distances[i] = np.linalg.norm(row[1].values - centroid)

    return pd.Series(distances, name='Distance to Centroid')

In [None]:
len(pca_result_df)

In [None]:
import networkx as nx
from tqdm import tqdm

def euclidean_distance(a, b):
    return np.linalg.norm(np.array(a) - np.array(b))

def knn_graph(df, k, verbose=False):
    points = [p[1:] for p in df.itertuples()]
    g = nx.Graph()
    for i in range(0, len(points)):
        g.add_node(i)
    if verbose:
        print("Building kNN graph (k = %d)..." % (k))
    iterpoints = tqdm(enumerate(points), total=len(
        points)) if verbose else enumerate(points)
    for i, p in iterpoints:
        distances = list(map(lambda x: euclidean_distance(p, x), points))
        closests = np.argsort(distances)[1:k+1]  # second trough kth closest
        for c in closests:
            # Check if distance is not zero before adding edge
            if distances[c] != 0:
                g.add_edge(i, c, weight=1.0 / distances[c], similarity=int(
                    1.0 / distances[c] * 1e4))
        g.nodes[i]['pos'] = p
    g.graph['edge_weight_attr'] = 'similarity'
    return g



def part_graph(graph, k, df=None):
    # Randomly partition nodes into k clusters
    clusters = random.choices(range(k), k=len(graph.nodes()))
    cluster_dict = {node: cluster for node, cluster in zip(graph.nodes(), clusters)}
    nx.set_node_attributes(graph, cluster_dict, 'cluster')
    
    if df is not None:
        df['cluster'] = nx.get_node_attributes(graph, 'cluster').values()
    
    return graph


def pre_part_graph(graph, k, df=None, verbose=False):
    if verbose:
        print("Begin clustering...")
        
    # Step 1: Create a feature matrix for k-means
    # For simplicity, let's use the node degrees as features
    node_features = np.array([list(graph.degree(node)) for node in graph.nodes()])
    
    # Step 2: Apply k-means clustering
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(node_features)
    labels = kmeans.labels_
    
    # Step 3: Assign cluster labels to the graph nodes
    for i, node in enumerate(graph.nodes()):
        graph.nodes[node]['cluster'] = labels[i]
    
    # Update the DataFrame if provided
    if df is not None:
        df['cluster'] = pd.Series(nx.get_node_attributes(graph, 'cluster')).value
    
    return graph


def get_cluster(graph, clusters):
    nodes = [n for n in graph.node if graph.node[n]['cluster'] in clusters]
    return nodes


def connecting_edges(partitions, graph):
    cut_set = []
    for a in partitions[0]:
        for b in partitions[1]:
            if a in graph:
                if b in graph[a]:
                    cut_set.append((a, b))
    return cut_set


def min_cut_bisector(graph):
    graph = graph.copy()
    graph = part_graph(graph, 2)
    partitions = get_cluster(graph, [0]), get_cluster(graph, [1])
    return connecting_edges(partitions, graph)


def get_weights(graph, edges):
    return [graph[edge[0]][edge[1]]['weight'] for edge in edges]


def bisection_weights(graph, cluster):
    cluster = graph.subgraph(cluster)
    edges = min_cut_bisector(cluster)
    weights = get_weights(cluster, edges)
    return weights


In [None]:
def get_cluster_centers(partitioned_graph):
    cluster_centers = {}
    for cluster_id in set(nx.get_node_attributes(partitioned_graph, 'cluster').values()):
        nodes_in_cluster = [node for node, cluster in nx.get_node_attributes(partitioned_graph, 'cluster').items() if cluster == cluster_id]
        cluster_points = [partitioned_graph.nodes[node]['pos'] for node in nodes_in_cluster]
        cluster_center = np.mean(cluster_points, axis=0)
        cluster_centers[cluster_id] = cluster_center
    return cluster_centers

In [None]:
pca_chameleon = pca_result_df.copy()

# Generate a kNN graph from the DataFrame
k = 4  # Specify the value of k for kNN graph
graph = knn_graph(pca_chameleon, k)

# Partition the graph into clusters
k_clusters = 7  # Specify the number of clusters
partitioned_graph = part_graph(graph, k_clusters, pca_chameleon)

cents = get_cluster_centers(partitioned_graph)

In [None]:
# Get the cluster assignment for each node
cluster_assignment = nx.get_node_attributes(partitioned_graph, 'cluster')

# Define colors for each cluster
#colors = ['skyblue', 'salmon', 'green', 'yellow', 'black']  # Add more colors if you have more clusters
colors = ['skyblue', 'salmon', 'green', 'yellow', 'black', 'orange', 'purple', 'cyan', 'magenta', 'lime']

# Draw the graph
plt.figure(figsize=(8, 6))
pos = nx.spring_layout(partitioned_graph)  # You can use different layout algorithms
for cluster in set(cluster_assignment.values()):
    nodes = [node for node, value in cluster_assignment.items() if value == cluster]
    nx.draw_networkx_nodes(partitioned_graph, pos, nodelist=nodes, node_color=colors[cluster], node_size=300, label=f'Cluster {cluster}')
nx.draw_networkx_edges(partitioned_graph, pos)
nx.draw_networkx_labels(partitioned_graph, pos, font_size=10)
plt.title('Graph Visualization with Cluster Colors')
plt.legend()
plt.show()

In [None]:
labels = pca_chameleon['cluster'].values

In [None]:
labels

In [None]:
arr_list = [list(map(float, arr)) for arr in cents.values()]

In [None]:
def calculate_wcvr(data, labels, centers):
    """
    Calculate the Within-Cluster Variance Ratio (WCVR) for S_Dbw index.

    Parameters:
    - data: pandas DataFrame, input data
    - labels: array-like, cluster labels assigned to each data point
    - centers: numpy array, cluster centers

    Returns:
    - wcvr: float, Within-Cluster Variance Ratio
    """
    num_clusters = len(np.unique(labels))
    total_wcv = 0

    for cluster_label in range(num_clusters):
        if cluster_label in labels and cluster_label < len(centers):
            cluster_points = data.loc[labels == cluster_label].values

            within_cluster_variance = np.mean(np.sum((cluster_points - centers[cluster_label]) ** 2, axis=1))
            total_wcv += within_cluster_variance

    wcvr = total_wcv / num_clusters

    return wcvr

In [None]:
def calculate_sd_bw_index(data, labels, centers):
    """
    Calculate the S_Dbw index for clustering validation.

    Parameters:
    - data: pandas DataFrame, input data
    - labels: array-like, cluster labels assigned to each data point
    - centers: numpy array, cluster centers

    Returns:
    - sd_bw_index: float, S_Dbw index value
    """
    data_array = data.values  # Convert DataFrame to numpy array
    try:
        silhouette_avg = silhouette_score(data_array, labels)
    except ValueError:
        print("Only 1 cluster -> S_Dbw is not possible")

    db_index = davies_bouldin_score(data_array, labels)
    wcvr = calculate_wcvr(data, labels, centers)

    sd_bw_index = (db_index + (1 - silhouette_avg) + wcvr) / 3

    return sd_bw_index

In [None]:
#KMeans
sd_bw_index = calculate_sd_bw_index(pca_result_df, labels, arr_list)
print("S_Dbw Index:", sd_bw_index)

<b>Get Original Data</b>

In [None]:
def get_original_data(pca_result_df, centroids, pca_model):
    """
    Get the original data from the centroids and the inverted DataFrame from applying PCA.

    Parameters:
    - pca_result_df (pandas.DataFrame): DataFrame containing PCA results.
    - centroids (numpy.ndarray): Array containing the centroids of each cluster.
    - pca_model (sklearn.decomposition.PCA): Fitted PCA model.

    Returns:
    - pandas.DataFrame: DataFrame containing the original data.
    """

    # Invert PCA transformation to get original data
    original_data = pca_model.inverse_transform(pca_result_df.values)

    # Convert the array back to a DataFrame
    original_data_df = pd.DataFrame(data=original_data, columns=pca_result_df.columns)

    # Add centroids to the DataFrame
    original_centroids = pca_model.inverse_transform(centroids)
    centroids_df = pd.DataFrame(data=original_centroids, columns=pca_result_df.columns)
    #original_data_with_centroids_df = pd.concat([original_data_df, centroids_df])

    return original_data_df, centroids_df

#pca_model = pca_model
# Example usage
original_data_df, original_centroids_df = get_original_data(pca_result_df, arr_list, pca_model)

In [None]:
original_data_df

In [None]:
original_centroids_df

In [None]:
# Zip 'Cluster Labels' with pca_result_df.iterrows()
zipped_results = zip(labels, pca_result_df.iterrows())
sum = 0

# Display the results
for cluster_label, (index, row) in zipped_results:
    sum+= 1
    print(f'Cluster Label: {cluster_label}, Index: {index}, Row Values: {row.values}')


In [None]:
print(sum)

In [None]:
# Zip 'Cluster Labels' with pca_result_df.iterrows()
zipped_results = zip(labels, pca_result_df.iterrows())

# Collect the results into a list
data_list = []
for cluster_label, (index, row) in zipped_results:
    data_list.append({'Cluster Label': cluster_label, 'Index': index, 'Row Values': row.values})

# Create a DataFrame from the list
Checking_df = pd.DataFrame(data_list)

# Remove duplicate rows based on 'Cluster Label' and 'Index'
Checking_df.drop_duplicates(subset=['Cluster Label', 'Index'], inplace=True)


In [None]:
print("DataFrame with Removed Duplicates:")
Checking_df

In [None]:
len(Checking_df['Row Values'][0])

In [None]:
pd.Series(labels, name='Cluster Labels')

In [None]:
# Calculate the distance between data points and their cluster centroids in the PCA space
distance_df = calculate_distance(pca_result_df, arr_list, pd.Series(labels, name='Cluster Labels'))

# Combine the original DataFrame with the PCA result, cluster labels, and distance
Final_result_df = pd.concat([pca_result_df, pd.Series(labels, name='Cluster Labels'), distance_df], axis=1)

In [None]:
Final_result_df

In [None]:
min_distance_indices = Final_result_df.groupby('Cluster Labels')['Distance to Centroid'].idxmin()

# Extract the corresponding rows from the DataFrame
min_distance_rows = Final_result_df.loc[min_distance_indices]

# Reset the index and name the index column as 'TC'
min_distance_rows.reset_index(inplace=True)
min_distance_rows.rename(columns={'index': 'TC'}, inplace=True)


In [None]:
kd = pd.DataFrame(min_distance_rows)
kd

In [None]:
kd['TC']

In [None]:
selected_rows = Non_Single_value_df.iloc[kd['TC']]

print(selected_rows)


In [None]:
selected_rows_pd = pd.DataFrame(selected_rows.reset_index(drop=True))
selected_rows_pd_Explicit_Minimum_to_centroids = pd.DataFrame(selected_rows)

In [None]:
selected_rows_pd_Explicit_Minimum_to_centroids

<b>Selected Rows</b>

In [None]:
selected_rows_pd

In [None]:
pca_model

In [None]:
pd.DataFrame(pca_model.components_)

In [None]:
selected_rows_indices = selected_rows_pd_Explicit_Minimum_to_centroids.iloc[:, 0].tolist()
selected_pdfs = Pdf.iloc[selected_rows_indices]

In [None]:
selected_pdfs

In [None]:
original_data_df

In [None]:
excel_file_path = ''
# Ensure the directory exists, create it if necessary
output_directory = os.path.dirname(excel_file_path)
os.makedirs(output_directory, exist_ok=True)

# Save the Pandas DataFrames as an Excel file with two sheets
with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:
    # Save the first DataFrame to the first sheet (Sheet1)
    selected_rows_pd.to_excel(writer, sheet_name='Sheet1', index=False)

In [None]:
Final_result_df

<b>Plotting using First 3 columns in PCA Dataframe, Cluster label for each PCA and Distance to Centroid</b>

In [None]:
# Create a 3D scatter plot
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

# Assign unique colors to clusters
colors = plt.cm.get_cmap('viridis', len(Final_result_df['Cluster Labels'].unique()))

# Define unique marker styles for each cluster
#marker_styles = ['o', 's', 'D', '^', 'v', 'p', '*', 'h']
marker_styles = ['o', 's', 'D', '^', 'v', 'p', '*', 'h', 'x', '+']

# Scatter plot for each cluster
for cluster_label in Final_result_df['Cluster Labels'].unique():
    cluster_data = Final_result_df[Final_result_df['Cluster Labels'] == cluster_label]
    ax.scatter(cluster_data['PC1'], cluster_data['PC2'], cluster_data['PC3'], label=f'Cluster {cluster_label}', c=[colors(cluster_label)], marker=marker_styles[cluster_label])

# Plot centroids
for i, (cluster_label, centroid) in enumerate(zip(pd.Series(labels).unique(), arr_list)):
    ax.scatter(centroid[0], centroid[1], centroid[2], marker='x', s=200, label=f'Centroid {i}', c=[colors(i)])#[colors(cluster_label)])

cluster_numbers = len(Final_result_df['Cluster Labels'].unique())  # Number of clusters

# Set labels
ax.set_xlabel('PC1', labelpad=20)
ax.set_ylabel('PC2', labelpad=20)
#ax.set_zlabel('PC3')
ax.set_zlabel('PC3', labelpad=20)  # Adjust the labelpad to move the label away from the axis
ax.set_title('3D Scatter Plot of Clusters and Centroids')
#ax.legend()
# Move legend to top left and make it smaller
ax.legend(loc='upper left', bbox_to_anchor=(0, 1), prop={'size': 8})

# Format the filename with the number of clusters
path_to_image = ''
plt.savefig(path_to_image)

# Show the plot
plt.show()

In [None]:
# Create a 1D scatter plot
fig, ax = plt.subplots(figsize=(10, 8))

# Calculate distances of each data point from each centroid
distances = []
for centroid in np.array(arr_list):
    distance = np.linalg.norm(pca_result_df[['PC1']].values - centroid, axis=1)
    distances.append(distance)

# Assign colors based on the closest centroid
colors = np.argmin(distances, axis=0)

# Scatter plot with colored data points
scatter = ax.scatter(pca_result_df['PC1'], np.zeros_like(pca_result_df['PC1']), c=colors, cmap='viridis')

# Plot centroids
for centroid in np.array(arr_list):
    ax.scatter(centroid[0], 0, marker='x', s=100, color='black')

# Set labels
ax.set_xlabel('PC1')
ax.set_title('1D Scatter Plot of PC1 with Colored Data Points')
ax.legend()

# Add colorbar
cbar = plt.colorbar(scatter, ax=ax)
cbar.set_label('Cluster')

# Save the plot as an image
Path_to_Image = ''
plt.savefig(Path_to_Image)

# Show the plot
plt.show()

<b> Save to Excel</b>

In [None]:
excel_file_path = ''
# Ensure the directory exists, create it if necessary
output_directory = os.path.dirname(excel_file_path)
os.makedirs(output_directory, exist_ok=True)

# Save the Pandas DataFrames as an Excel file with two sheets
with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:
    # Save the first DataFrame to the first sheet (Sheet1)
    Non_Single_value_df.to_excel(writer, sheet_name='Original_Data', index=False)

    # Save the second DataFrame to the second sheet (Sheet2)
    original_data_df.to_excel(writer, sheet_name='Original_data_back_from_PCA', index=False, startrow=0)

    selected_rows_pd_Explicit_Minimum_to_centroids.to_excel(writer, sheet_name='TCs_With_min_Dist_to_Centroids', index=False, startrow=0)

    original_centroids_df.to_excel(writer, sheet_name='Centroids_back_from_PCA', index=False, startrow=0)

In [None]:
Non_Single_value_df.columns

In [None]:
Non_Single_value_df

In [None]:
original_data_df

In [None]:
Original_selected_rows_pd_Explicit_Minimum_to_centroids = selected_rows_pd_Explicit_Minimum_to_centroids.copy()

In [None]:
selected_rows_pd_Explicit_Minimum_to_centroids

In [None]:
original_centroids_df

In [None]:
# Create a dictionary mapping index values to corresponding column names
new_TC_ID_columns_dict = {i: TC_ID_df.at[i, 'TC_ID'] for i in TC_ID_df.index if i in selected_rows_pd_Explicit_Minimum_to_centroids.index}

# Print the dictionary
new_TC_ID_columns_dict

In [None]:
Trpose = Original_selected_rows_pd_Explicit_Minimum_to_centroids.transpose().copy()
Trpose

In [None]:
# Rename columns
Trpose = Trpose.rename(columns=new_TC_ID_columns_dict)
Trpose