In [16]:
import os
from glob import glob
from numpy.random import default_rng
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import plotly.express as px
import plotly.graph_objects as go
from sklearn.neighbors import NearestNeighbors

import scanpy as sc
import squidpy as sq
from anndata import AnnData
import pandas as pd
import PIL
from PIL import Image
import numpy as np

import json

import warnings

warnings.filterwarnings('ignore')

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")


scanpy==1.10.1 anndata==0.10.7 umap==0.5.6 numpy==1.23.4 scipy==1.11.1 pandas==2.2.2 scikit-learn==1.3.0 statsmodels==0.14.0 igraph==0.11.4 pynndescent==0.5.12
squidpy==1.4.1


In [21]:
def knn_analysis(coordinates, k=10):
    nbrs = NearestNeighbors(n_neighbors=k+1, p=2).fit(coordinates)
    distances, indices = nbrs.kneighbors(coordinates)
    
    results = []
    for i, (dist, ind) in enumerate(zip(distances, indices)):
        for d, idx in zip(dist[1:], ind[1:]):
            results.append({
                'seed_coord': coordinates[i],
                'neighbor_coord': coordinates[idx],
                'distance': d
            })
    
    df = pd.DataFrame(results)
    return df


def process_csv_files(directory) :
    results = {}
    csv_files = glob(os.path.join(directory,'*.csv'))
    for filepath in csv_files :
        df = pd.read_csv(filepath)
        coordinates = df.iloc[:, 0:2]
        coordinates = coordinates.to_numpy()
        ## Invert the two columns
        coordinates = coordinates[:, [0, 1]]
        knn_df = knn_analysis(coordinates)
        filename = os.path.basename(filepath)
        results[filename] = knn_df
        
    return results

In [22]:
directory = "/nethome/anastasiya.kazachenk/Projects/Ultivue/Immune_Panel_Cell_Objects_Data/modified"

#os.listdir(directory)

#print(os.listdir(directory))

# Proces modified csv files
knn_results = process_csv_files(directory)

In [23]:
knn_results

{'modified_USL-2023-53530_2.csv':            seed_coord neighbor_coord   distance
 0        [897, 24815]   [912, 24815]  15.000000
 1        [897, 24815]   [899, 24833]  18.110770
 2        [897, 24815]   [922, 24829]  28.653098
 3        [897, 24815]   [923, 24788]  37.483330
 4        [897, 24815]   [936, 24826]  40.521599
 ...               ...            ...        ...
 374125  [28497, 4888]  [28453, 4907]  47.927028
 374126  [28497, 4888]  [28447, 4870]  53.141321
 374127  [28497, 4888]  [28469, 4840]  55.569776
 374128  [28497, 4888]  [28482, 4833]  57.008771
 374129  [28497, 4888]  [28457, 4931]  58.728187
 
 [374130 rows x 3 columns],
 'modified_USL-2023-53531_2.csv':             seed_coord  neighbor_coord    distance
 0        [2829, 14802]   [2918, 14820]   90.801982
 1        [2829, 14802]   [2902, 14747]   91.400219
 2        [2829, 14802]   [2929, 14769]  105.304321
 3        [2829, 14802]   [2931, 14835]  107.205410
 4        [2829, 14802]   [2900, 14714]  113.070774
 ...

In [27]:
for filename, df in knn_results.items():
    output_filename = f"{os.path.splitext(filename)[0]}_knn.csv"
    output_path = os.path.join("/nethome/anastasiya.kazachenk/Projects/Ultivue/Immune_Panel_Cell_Objects_Data/KNN",output_filename)
    df.to_csv(output_path, index=False)