In [1]:
import numpy as np
import pandas as pd
from scipy.ndimage import generic_filter
from scipy.stats import mannwhitneyu

In [2]:
def normalize_image(image):
    min_val = np.min(image)
    max_val = np.max(image)
    return (image - min_val) / (max_val - min_val + 1e-8)

def total_variation(image):
    dx = np.diff(image, axis=1)  # horizontal diffs
    dy = np.diff(image, axis=0)  # vertical diffs
    # Pad shorter arrays to match shape
    dx = np.pad(dx, ((0,0), (0,1)), mode='constant')
    dy = np.pad(dy, ((0,1), (0,0)), mode='constant')
    return np.sum(np.sqrt(dx**2 + dy**2))

def local_std(image, size=3):
    return np.mean(generic_filter(image, np.std, size=(size, size)))

def calc_total_variation(image):
    norm = normalize_image(image)
    return total_variation(norm)

def calc_local_std(image):
    norm = normalize_image(image)
    return local_std(norm)
    

In [3]:
#all mammals:
mammals = [
    "Platypus genes (mOrnAna1.p.v1)_map",
    "Opossum genes (ASM229v1)_map",
    "Armadillo genes (Dasnov3.0)_map",
    "Cow genes (ARS-UCD1.2)_map",
    "Dog genes (ROS_Cfam_1.0)_map",
    "Rabbit genes (OryCun2.0)_map",
    "Guinea Pig genes (Cavpor3.0)_map",
    "Rat genes (mRatBN7.2)_map",
    "Mouse genes (GRCm39)_map",
    "Human genes (GRCh38.p13)_map"
]

non_mammals = [
    "Hagfish genes (Eburgeri_3.2)_map",
    "Elephant shark genes (Callorhinchus_milii-6.1.3)_map",
    "Spotted gar genes (LepOcu1)_map",
    "Zebrafish genes (GRCz11)_map",
    "Atlantic cod genes (gadMor3.0)_map",
    "Coelacanth genes (LatCha1)_map",
    "Tropical clawed frog genes (Xenopus_tropicalis_v9.1)_map",
    "Tuatara genes (ASM311381v1)_map",
    "Green anole genes (AnoCar2.0v2)_map",
    "Painted turtle genes (Chrysemys_picta_bellii-3.0.3)_map",
    "Zebra finch genes (bTaeGut1_v1.p)_map"
]

In [4]:
mammal_maps = []
non_mammal_maps = []
for mammal in mammals:
    mammal_maps.append(pd.read_csv("data/"+mammal+".csv", header=None).values)
non_mammal_maps = []
for non_mammal in non_mammals:
    non_mammal_maps.append(pd.read_csv("data/"+non_mammal+".csv", header=None).values)

In [5]:
mammal_tv = [calc_total_variation(mammal) for mammal in mammal_maps]
mammal_std = [calc_local_std(mammal) for mammal in mammal_maps]

non_mammal_tv = [calc_total_variation(non_mammal) for non_mammal in non_mammal_maps]
non_mammal_std = [calc_local_std(non_mammal) for non_mammal in non_mammal_maps]

In [10]:
non_mammal_std

[0.008999124978824772,
 0.021945681761470257,
 0.017401283169722843,
 0.012122452771192019,
 0.00618679665268959,
 0.0049154506395271726,
 0.012887427941413308,
 0.021956554779315348,
 0.0295926106465236,
 0.025660056780765335,
 0.01592701072995316]

In [38]:
mannwhitneyu(mammal_std, non_mammal_std)

MannwhitneyuResult(statistic=72.0, pvalue=0.2452781168067728)