In [8]:
!pip install biopython



In [9]:
def eig_val(filename):
    xlst = []
    ylst = []
    zlst = []

    dic = {
        'A': (0.9511, 0.309, -1),
        'B': (0.9511, 0.309, -1),
        'C': (0.809, 0.5878, -1),
        'D': (-0.9511, -0.309, 1),
        'E': (0.309, -0.9511, 1),
        'F': (-0.309, 0.9511, 1),
        'G': (0.5878, -0.809, -1),
        'H': (0.809, -0.5878, 1),
        'I': (-0.5878, 0.809, -1),
        'J': (-0.5878, 0.809, -1),
        'K': (-0.809, -0.5878, 1),
        'L': (-0.809, 0.5878, -1),
        'M': (0.5878, 0.809, 1),
        'N': (-0.5878, -0.809, -1),
        'O': (-0.5878, -0.809, -1),
        'P': (0.309, 0.9511, -1),
        'Q': (0.9511, -0.309, 1),
        'R': (-0.309, -0.9511, 1),
        'S': (0.0, -1.0, -1),
        'T': (1.0, 0.0, -1),
        'U': (1.0, 0.0, -1),
        'V': (0.0, 1.0, -1),
        'W': (-0.9511, 0.309, 1),
        'X': (-0.9511, 0.309, 1),
        'Y': (-1.0, 0.0, 1),
        'Z': (-1.0, 0.0, 1)
    }

    for elem in filename:
        if elem not in dic:
            raise ValueError(f"Invalid amino acid: {elem}")

        x, y, z = map(float, dic[elem])  # Ensure values are treated as floats
        xlst.append(x)
        ylst.append(y)
        zlst.append(z)

    xcom = np.mean(xlst)
    ycom = np.mean(ylst)
    zcom = np.mean(zlst)

    # Convert lists to numpy arrays for operations
    xlst = np.array(xlst) - xcom
    ylst = np.array(ylst) - ycom
    zlst = np.array(zlst) - zcom

    ixx=np.sum(np.square(ylst)+np.square(zlst))
    iyy=np.sum(np.square(xlst)+np.square(zlst))
    izz=np.sum(np.square(xlst)+np.square(ylst))
    ixy=np.sum(np.multiply(xlst,ylst))
    iyx=ixy
    iyz=np.sum(np.multiply(ylst,zlst))
    izy=iyz
    ixz=np.sum(np.multiply(xlst,zlst))
    izx=ixz

    # Eigenvalue calculation
    from numpy.linalg import eig
    evalue, evector = eig(np.array([[ixx, -ixy, -ixz],
                                     [-iyx, iyy, -iyz],
                                     [-izx, -izy, izz]]))

    return evalue

In [10]:
import pandas as pd
import numpy as np
cep = pd.read_csv("sample_data/fretrieve.csv")
exp = np.array(cep.iloc[:,[-1]]).flatten()# Extract Experimental gene id
exp = list(exp[:170])
pre = np.array(cep.iloc[:,[1]]).flatten()# Extract Predicted gene id
pre = list(pre)
print(exp)
#Data Correction
print(pre)


['Q09428', 'Q9P2N4', 'O95622', 'Q15848', 'P31751', 'P59780', 'P18847', 'P16615', 'Q93084', 'Q8WXX7', 'P10415', 'Q07817', 'O43521', 'Q93088', 'P15056', 'Q8NCU7', 'A6NLJ0', 'P01024', 'Q9HC96', 'P42574', 'Q14790', 'P04040', 'Q6IB77', 'P35520', 'Q53HC0', 'P30279', 'Q5VV42', 'Q16878', 'Q8N5K1', 'Q8IY22', 'P50416', 'P13498', 'P05177', 'Q16760', 'P42892', 'P05305', 'P25101', 'P24530', 'P00533', 'P22413', 'Q52LR7', 'P14921', 'Q9H0X4', 'P25445', 'P49327', 'Q9NSA1', 'Q9C0B1', 'P01275', 'P47871', 'P35557', 'Q12851', 'Q14397', 'Q8NEA6', 'P43220', 'P16520', 'P55259', 'P43304', 'P07203', 'Q14449', 'P09488', 'Q03014', 'P19367', 'Q30154', 'Q9NP66', 'P17096', 'P09601', 'P20823', 'P35680', 'P41235', 'P00738', 'P02790', 'P10997', 'P05362', 'P41134', 'Q9Y6M1', 'P78552', 'O15357', 'P01308', 'P06213', 'P35568', 'Q9Y4H2', 'P56199', 'Q8WWA0', 'Q9NQC1', 'Q86VZ6', 'Q14654', 'Q96T55', 'P51787', 'A8MYU2', 'Q9UEF7', 'Q3SY56', 'Q8TD94', 'Q6VAB6', 'Q8WXI2', 'P41159', 'O15243', 'P48357', 'P11150', 'Q7L5Y9', 'Q9UQF2',

In [11]:
from Bio import Entrez, SeqIO
import numpy as np

# Set up Entrez email
Entrez.email = '19phmp03@uohyd.ac.in'  # Always provide a valid email for NCBI access

def retrieve(filename):
    """
    Fetch protein sequences by ID from NCBI and compute eigenvalues for each sequence.

    Args:
    filename (str or list): Protein ID(s) to be fetched from NCBI.

    Returns:
    lst1 (list): List of eigenvalues for each protein sequence.
    """
    try:
        # Fetch protein sequences from NCBI by their IDs
        handle = Entrez.efetch(db='protein', id=filename, rettype='fasta', retmode='text')
        records = list(SeqIO.parse(handle, 'fasta'))  # Parse FASTA records into a list

        if len(records) == 0:
            raise ValueError(f"No records found for the ID: {filename}")

        # Display description of the last retrieved record
        print("Last retrieved sequence description:", records[-1].description)

        # Initialize list to store eigenvalues for each sequence
        lst1 = []

        # Process each sequence record
        for record in records:
            # Check if the sequence is valid
            sequence = str(record.seq)
            if len(sequence) == 0:
                raise ValueError(f"Empty sequence found for record: {record.id}")

            # Compute eigenvalues for the sequence
            eigenvalues = eig_val(sequence)  # Assuming sequence is passed as a string
            lst1.append(eigenvalues)

        return lst1

    except Exception as e:
        print(f"Error retrieving or processing sequences: {e}")
        return []


In [12]:
import numpy as np
import pandas as pd

def database1(file1, file2):
    print(f"Retrieving eigenvalues for experimental IDs: {file1}")
    print(f"Retrieving eigenvalues for predicted IDs: {file2}")

    exp_eigenvalues = retrieve(file1)
    pre_eigenvalues = retrieve(file2)

    if len(exp_eigenvalues) == 0 or len(pre_eigenvalues) == 0:
        raise ValueError("No eigenvalues found for one or both inputs. Please check the filenames.")

    # The rest of your code...
    # Retrieve eigenvalues for experimental and predicted protein IDs
    exp_eigenvalues = retrieve(file1)
    pre_eigenvalues = retrieve(file2)

    if len(exp_eigenvalues) == 0 or len(pre_eigenvalues) == 0:
        raise ValueError("No eigenvalues found for one or both inputs. Please check the filenames.")

    # Initialize a distance matrix
    distance_matrix = np.zeros((len(exp_eigenvalues), len(pre_eigenvalues)))

    # Calculate pairwise distances
    for i, exp_elem in enumerate(exp_eigenvalues):
        for j, pre_elem in enumerate(pre_eigenvalues):
            distance_matrix[i, j] = np.linalg.norm(exp_elem - pre_elem)

    # Create a DataFrame with appropriate indices and columns
    df = pd.DataFrame(distance_matrix, index=[f"Exp_{i+1}" for i in range(len(exp_eigenvalues))],
                      columns=[f"Pre_{j+1}" for j in range(len(pre_eigenvalues))])

    return df



In [13]:
y = database1(exp, pre)
print(y)

Retrieving eigenvalues for experimental IDs: ['Q09428', 'Q9P2N4', 'O95622', 'Q15848', 'P31751', 'P59780', 'P18847', 'P16615', 'Q93084', 'Q8WXX7', 'P10415', 'Q07817', 'O43521', 'Q93088', 'P15056', 'Q8NCU7', 'A6NLJ0', 'P01024', 'Q9HC96', 'P42574', 'Q14790', 'P04040', 'Q6IB77', 'P35520', 'Q53HC0', 'P30279', 'Q5VV42', 'Q16878', 'Q8N5K1', 'Q8IY22', 'P50416', 'P13498', 'P05177', 'Q16760', 'P42892', 'P05305', 'P25101', 'P24530', 'P00533', 'P22413', 'Q52LR7', 'P14921', 'Q9H0X4', 'P25445', 'P49327', 'Q9NSA1', 'Q9C0B1', 'P01275', 'P47871', 'P35557', 'Q12851', 'Q14397', 'Q8NEA6', 'P43220', 'P16520', 'P55259', 'P43304', 'P07203', 'Q14449', 'P09488', 'Q03014', 'P19367', 'Q30154', 'Q9NP66', 'P17096', 'P09601', 'P20823', 'P35680', 'P41235', 'P00738', 'P02790', 'P10997', 'P05362', 'P41134', 'Q9Y6M1', 'P78552', 'O15357', 'P01308', 'P06213', 'P35568', 'Q9Y4H2', 'P56199', 'Q8WWA0', 'Q9NQC1', 'Q86VZ6', 'Q14654', 'Q96T55', 'P51787', 'A8MYU2', 'Q9UEF7', 'Q3SY56', 'Q8TD94', 'Q6VAB6', 'Q8WXI2', 'P41159', 'O15

In [14]:
y.head()

Unnamed: 0,Pre_1,Pre_2,Pre_3,Pre_4,Pre_5,Pre_6,Pre_7,Pre_8,Pre_9,Pre_10,...,Pre_2886,Pre_2887,Pre_2888,Pre_2889,Pre_2890,Pre_2891,Pre_2892,Pre_2893,Pre_2894,Pre_2895
Exp_1,2406.126716,3130.636453,2288.571438,1630.527721,1397.460177,2169.018468,1693.502521,1877.500307,459.792257,90.948615,...,2254.727472,2534.010582,5688.744556,2550.446533,2567.846288,2680.771312,1709.51843,1772.855163,2223.362848,3948.874327
Exp_2,3439.099014,4070.301238,748.011533,2160.158587,1552.877034,2867.086937,2868.375225,2996.350204,2056.716888,1900.633793,...,2842.267097,3533.6294,4280.165327,3280.941295,3266.273119,3428.187977,1970.102234,2081.827244,2866.499373,2468.078314
Exp_3,1845.684037,2468.055732,2312.651866,2772.727705,66.268749,1267.342379,1346.632434,1441.942709,1166.095366,1352.577476,...,1256.459183,1935.257038,5880.459401,1685.414786,1674.451866,1833.423756,430.452211,528.816894,1273.594377,4036.635614
Exp_4,714.322854,222.319896,4650.430308,4721.539581,2390.762624,1072.011373,1411.422595,1225.998047,2634.872256,3011.718376,...,1108.315432,583.887828,8215.416456,655.347911,680.78319,517.566576,1988.26557,1875.191412,1081.110077,6357.383469
Exp_5,498.366419,740.211091,4069.191726,4218.727541,1809.743721,494.076271,993.684478,823.86213,2151.275157,2520.714188,...,535.775456,445.900027,7632.305465,72.154664,138.793629,119.609836,1410.091163,1297.186331,509.255913,5777.754192


In [None]:
import plotly.graph_objects as go

z_data = y
fig = go.Figure(data=[go.Surface(z=z_data.values)])
fig.update_layout(title='Distance Matrix', autosize=False,
                  width=1000, height=1000,
                  margin=dict(l=65, r=50, b=65, t=90))

fig.show()

In [None]:
def z_score_outlier_removal(df, z_thresh=3):
    mean = df.mean().mean()
    std = df.std().std()
    # Replace values that are beyond z_thresh standard deviations with the mean ± z_thresh*std
    filtered_df = df.applymap(lambda x: x if abs((x - mean) / std) <= z_thresh else mean + z_thresh * std)
    return filtered_df

# Apply Z-Score method
df_without_outliers = z_score_outlier_removal(y)

# Plot the result
z_data_no_outliers = df_without_outliers
fig_no_outliers = go.Figure(data=[go.Surface(z=z_data_no_outliers.values)])
fig_no_outliers.update_layout(title='Distance Matrix (Without Outliers - Z-Score)', autosize=False,
                              width=1000, height=1000,
                              margin=dict(l=65, r=50, b=65, t=90))

fig_no_outliers.show()

In [None]:
print("Range before Z-score:", y.min().min(), "-", y.max().max())
print("Range after Z-Score:", df_z_score.min().min(), "-", df_z_score.max().max())


Range before Z-score: 0.0 - 80710.61514229472


NameError: name 'df_z_score' is not defined

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Heatmap for Original Distance Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(y, cmap='viridis')
plt.title("Heatmap of Original Distance Matrix")
plt.show()

# Heatmap for Refined Distance Matrix (Outliers Removed)
plt.figure(figsize=(10, 8))
sns.heatmap(df_without_outliers, cmap='viridis')
plt.title("Heatmap of Refined Distance Matrix (Outliers Removed)")
plt.show()

In [None]:
# Boxplot of Original Distance Matrix
plt.figure(figsize=(10, 6))
sns.boxplot(data=y.values.flatten())
plt.title("Boxplot of Original Distance Matrix")
plt.show()

# Boxplot of Refined Distance Matrix (Outliers Removed)
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_without_outliers.values.flatten())
plt.title("Boxplot of Refined Distance Matrix (Outliers Removed)")
plt.show()

In [None]:
# Histogram for Original Distance Matrix
plt.figure(figsize=(10, 6))
plt.hist(y.values.flatten(), bins=50, alpha=0.6, label='Original')
plt.title("Histogram of Original Distance Matrix")
plt.xlabel("Distance Values")
plt.ylabel("Frequency")
plt.legend()
plt.show()

# Histogram for Refined Distance Matrix (Outliers Removed)
plt.figure(figsize=(10, 6))
plt.hist(df_without_outliers.values.flatten(), bins=50, alpha=0.6, label='Refined', color='orange')
plt.title("Histogram of Refined Distance Matrix (Outliers Removed)")
plt.xlabel("Distance Values")
plt.ylabel("Frequency")
plt.legend()
plt.show()

In [None]:
# Scatter plot comparing original and refined distance values
plt.figure(figsize=(10, 6))
plt.scatter(range(len(y.values.flatten())), y.values.flatten(), label='Original', alpha=0.5)
plt.scatter(range(len(df_without_outliers.values.flatten())), df_without_outliers.values.flatten(), label='Refined', alpha=0.5, color='orange')
plt.title("Scatter Plot: Original vs Refined Distance Matrix")
plt.xlabel("Index")
plt.ylabel("Distance Value")
plt.legend()
plt.show()

In [None]:
# Calculate the difference matrix between original and refined
diff_matrix = y - df_without_outliers

# 3D Surface plot of the difference matrix
fig_diff = go.Figure(data=[go.Surface(z=diff_matrix.values)])
fig_diff.update_layout(title='Difference Matrix (Original - Refined)', autosize=False,
                       width=1000, height=1000,
                       margin=dict(l=65, r=50, b=65, t=90))
fig_diff.show()

In [None]:
df_without_outliers.to_numpy().max()

1684.114482694149

In [None]:
df_without_outliers.head()

Unnamed: 0,Pre_1,Pre_2,Pre_3,Pre_4,Pre_5,Pre_6,Pre_7,Pre_8,Pre_9,Pre_10,...,Pre_2886,Pre_2887,Pre_2888,Pre_2889,Pre_2890,Pre_2891,Pre_2892,Pre_2893,Pre_2894,Pre_2895
Exp_1,1684.114483,1684.114483,1684.114483,1630.527721,1397.460177,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,...,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483
Exp_2,1684.114483,1684.114483,1684.114483,1684.114483,1552.877034,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,...,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483
Exp_3,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1267.342379,1346.632434,1441.942709,1166.095366,1352.577476,...,1256.459183,1684.114483,1684.114483,1684.114483,1674.451866,1684.114483,1684.114483,1684.114483,1273.594377,1684.114483
Exp_4,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1072.011373,1411.422595,1225.998047,1684.114483,1684.114483,...,1108.315432,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1081.110077,1684.114483
Exp_5,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,993.684478,823.86213,1684.114483,1684.114483,...,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1410.091163,1297.186331,1684.114483,1684.114483


In [None]:
df_without_outliers.describe()

Unnamed: 0,Pre_1,Pre_2,Pre_3,Pre_4,Pre_5,Pre_6,Pre_7,Pre_8,Pre_9,Pre_10,...,Pre_2886,Pre_2887,Pre_2888,Pre_2889,Pre_2890,Pre_2891,Pre_2892,Pre_2893,Pre_2894,Pre_2895
count,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,...,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
mean,1539.347064,1591.363479,1682.39527,1683.799266,1621.842096,1427.04397,1312.279945,1306.918857,1620.035231,1657.515323,...,1400.1117,1580.07471,1684.114,1560.169359,1550.357463,1609.617835,1576.718568,1550.36873,1435.320666,1683.02965
std,283.486817,222.679874,22.415789,4.109917,167.785896,309.346463,286.411037,310.41966,145.099404,89.50564,...,316.815964,248.022023,2.280454e-12,260.422797,268.759085,205.201249,189.222169,199.632863,303.613054,14.144486
min,824.298531,832.780366,1391.848355,1630.527721,869.494589,825.687332,826.186826,823.86213,1148.751438,964.038463,...,824.692158,827.124793,1684.114,828.135194,840.61861,833.469799,835.763005,823.682621,823.847144,1499.69295
25%,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1161.35525,1076.625913,1014.056755,1684.114483,1684.114483,...,1119.361468,1684.114483,1684.114,1684.114483,1684.114483,1684.114483,1542.498462,1453.324705,1160.993996,1684.114483
50%,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1295.891872,1307.677153,1684.114483,1684.114483,...,1625.41981,1684.114483,1684.114,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483
75%,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1635.388688,1684.114483,1684.114483,1684.114483,...,1684.114483,1684.114483,1684.114,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483
max,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,...,1684.114483,1684.114483,1684.114,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483,1684.114483


In [None]:
#y.to_csv('dist_matrixT2DM.csv')
#files.download('dist_matrixT2DM.csv')
#x = np.array(y.iloc[:,[1410]])
x = np.array(df_without_outliers.iloc[:,[1410]])
x

array([[1684.11448269],
       [1684.11448269],
       [1434.10421832],
       [ 904.21801031],
       [1684.11448269],
       [1063.86762009],
       [1040.56416234],
       [ 906.73402399],
       [ 827.85394917],
       [1460.76720681],
       [ 917.17453935],
       [ 927.71381939],
       [1006.28951649],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [ 852.77685103],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1684.11448269],
       [1000.53318323],
       [1158.86941237],
       [1684.11448269],
       [1684.11448269],
       [1033.4949619 ],
       [1684.11448269],
       [1353.52755038],
       [1684.11448269],
       [ 968.18751138],
       [1684.11448269],
       [1684.11448269],
       [1324.81969877],
       [1684.11448269],
       [1684.11448269],
       [1684.114

In [None]:
#Row Elimination
df = df_without_outliers
df_without_outliers.to_numpy().max()
#df2 = df.drop(df.columns[[0, 4, 2]], axis=1, inplace=True)
#print(df.to_numpy().max())
x = np.array(df.iloc[:,:])
#x = np.array(df.iloc[[162],:])
row_index = np.where(x == x.max())
print(row_index)
x.tolist()
print(x.max())
x.sort()
np.array(df.iloc[row_index[0]])



In [None]:
#Column Elimination
df = df_without_outliers
#df2 = df.drop(df.columns[[2583]], axis=1, inplace=True) #157,731,1330,1415,1572,1778,2119,2414,
#print(df.to_numpy().max())
m = 1396
print(np.array(df.iloc[:10,m]))
print(np.array(df.iloc[:,m]).max())
#print(y.to_numpy().max())
#print(df.to_numpy().max())


print(df.columns[m])



In [None]:
df.to_numpy()
df = np.sort(df)
np.flip(df)

In [None]:
# Handle NaN values - Option 1: Drop rows/columns with NaN values
#df_cleaned = df_without_outliers.dropna(how='any')  # Drop rows with any NaN values
# Alternatively, you can drop columns with NaN values using dropna(axis=1)
df = df_without_outliers
# Handle NaN values - Option 2: Fill NaN values with a specific number (e.g., 0 or column mean)
#df_cleaned = df_without_outliers.fillna(0)  # Replace NaN values with 0
#df_cleaned = df_without_outliers.fillna(df_without_outliers.mean())  # Replace NaN values with column mean

# Now calculate max_val based on cleaned data
max_val = int(df.to_numpy().max() - df.to_numpy().min())  # difference between max. dist and min dist
threshold = 0.01 * max_val

# Binary transformation based on threshold
df1 = df.where(df <= threshold, 0)  # Entries <= threshold survive, others set to 0
df2 = df1.where(df > threshold, 1)  # Entries > threshold set to 1

# Resulting Binary Matrix
print(df2)

In [None]:
freq_dist = pd.DataFrame(df2.sum())
freq_dist.columns = ['Frequency']
Thresold_val = 25 # Sequences whose Frequency is less than the thresold_value shall be removed
filtered = freq_dist[freq_dist['Frequency'] >= Thresold_val]
filtered = filtered.sort_values(by=['Frequency'])
filtered.to_csv('file1.csv')



In [None]:
for i in range(Thresold_val,int(df2.sum().max())+1):
    same_freq_lst = []
    row_index = np.where(filtered == i)
    for j in range(len(row_index[0])):
        same_freq_lst.append(filtered.iloc[row_index[0][j]].name)
    #print(same_freq_lst)
    df3 = pd.DataFrame(df,columns=same_freq_lst)
    df4 = df3.where(df3 <= threshold,0)# entries whose val is less the thresold will survive, 0 otherwise
    df4 = df4 / threshold
    freq_dist = pd.DataFrame(df4.sum())
    #freq_dist
    freq_dist.columns = ['Frequency']
    freq_dist.sort_values('Frequency',ascending = True)
    freq_dist.to_csv('fileterd_freq.csv')

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

# Assuming 'x' is defined and contains your data
lab = list(range(0, 49))  # Adjust the range based on your data size
linkage_matrix = linkage(x, "single")

# Set up the figure size
plt.figure(figsize=(50, 50))  # High dimensions for the figure
dendrogram(linkage_matrix)  # You can label with the appropriate indices or names
plt.title('Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Distance')

# Save the figure as an image
plt.savefig('dendrogram.png', dpi=50, bbox_inches='tight')  # Save as PNG with high DPI
plt.close()  # Close the figure to free up memory