### --- Step 1: Load Expression Data ---

In [12]:
# Load your one-sample file
data1 <- read.table("GSM1172856_CAMA1.txt", header = TRUE, row.names = 1)

# Load large expression matrix (no header)
# Load raw data (no header)
raw_data2 <- read.delim("GSE62944_1.txt", header = FALSE, check.names = FALSE)

# Transpose
expr_data2 <- t(raw_data2)

# Safely convert to numeric and preserve structure
suppressWarnings({
  expr_data2_numeric <- matrix(as.numeric(expr_data2), 
                               nrow = nrow(expr_data2), 
                               ncol = ncol(expr_data2))
})

# Remove any rows with NA values
expr_data2_clean <- expr_data2_numeric[complete.cases(expr_data2_numeric), ]

# Assign dimnames ONLY if row/col counts match
if (nrow(expr_data2_clean) > 0 && ncol(expr_data2_clean) > 0) {
  rownames(expr_data2_clean) <- paste0("Gene", seq_len(nrow(expr_data2_clean)))
  colnames(expr_data2_clean) <- paste0("Sample", seq_len(ncol(expr_data2_clean)))
}

# Now it's clean, numeric, and safe for further use
expr_data2 <- expr_data2_clean



### --- Step 2: Gene Filtering by Variance ---

In [5]:
gene_var <- apply(expr_matrix, 1, var)
top_genes <- names(sort(gene_var, decreasing = TRUE))[1:1000]
expr_matrix <- expr_matrix[top_genes, ]


### --- Step 3: Define Annotations ---

In [7]:
annotation <- c("A", rep("B", ncol(expr_data2)))

### --- Step 4: Create Distance Matrix ---

In [8]:
dist_matrix <- as.matrix(dist(t(expr_matrix), method = "euclidean"))

### --- Step 5: Hobotnica Functions ---

In [10]:
Hobotnica <- function(distMatrix, annotation){
  annotation <- as.vector(annotation)
  rank.m <- as.matrix(distMatrix)
  rank.m[lower.tri(rank.m)] <- rank(rank.m[lower.tri(rank.m)])
  rank.m[upper.tri(rank.m)] <- rank(rank.m[upper.tri(rank.m)])
  inclass_sum <- 0
  classes <- unique(annotation)
  Ns <- vector()
  
  for (i in 1:length(classes)) {
    class_samples <- which(annotation == classes[i])
    Ns[i] <- length(class_samples)
    inclass_sum <- inclass_sum + sum(rank.m[class_samples, class_samples])
  }
  
  Ns_sum <- sum(Ns)
  biggest_bossible_rank <- Ns_sum * (Ns_sum - 1) / 2
  number_of_unique_inclass_elements <- sum(Ns * (Ns - 1)) / 2
  maximal_value <- number_of_unique_inclass_elements *
    (2 * biggest_bossible_rank - number_of_unique_inclass_elements + 1)
  minimal_value <- number_of_unique_inclass_elements *
    (1 + number_of_unique_inclass_elements)
  normalization_factor <- maximal_value - minimal_value
  
  return(max(0, 1 - (inclass_sum - minimal_value) / normalization_factor))
}

Hobot_distr <- function(N, distMatrix, annotation){
  hobots <- vector()
  for (i in 1:N){
    sample_anno <- sample(annotation)
    hobots <- c(hobots, Hobotnica(distMatrix, sample_anno))
  }
  return(hobots)
}

Hobot_pval <- function(Test_hobot, Hobots){
  p_val <- mean(Hobots <= Test_hobot)
  return(p_val)
}


### --- Step 6: Run Hobotnica Analysis ---

In [11]:
set.seed(42)
H_score <- Hobotnica(dist_matrix, annotation)
null_dist <- Hobot_distr(1000, dist_matrix, annotation)
p_val <- Hobot_pval(H_score, null_dist)
cat("Hobotnica H-score:", H_score, "\n")
cat("P-value:", p_val, "\n")

Hobotnica H-score: 0.1658749 
P-value: 0.002 
