In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

In [9]:
df = pd.read_csv('./GSE150150_raw_counts.csv')

In [10]:
columns = ["Trunk E9.5", "Trunk E10.5", "DRG E9.5", "DRG E10.5", "DRG E11.5", "DRG E12.5", "Cranio E12.5"]
idMappings = {
    "SS2_15_0085": "Trunk E9.5",
    "SS2_18_107": "Trunk E10.5",
    "SS2_18_108": "Trunk E10.5",
    "SS2_18_109": "Trunk E10.5",
    "SS2_16_767": "DRG E9.5",
    "SS2_17_435": "DRG E9.5",
    "SS2_16_688": "DRG E10.5",
    "SS2_16_769": "DRG E10.5",
    "SS2_17_432": "DRG E10.5",
    "SS2_17_433": "DRG E10.5",
    "SS2_16_385": "DRG E11.5",
    "SS2_16_384": "DRG E12.5",
    "SS2_15_0073": "Cranio E12.5",
    "SS2_15_0169": "Cranio E12.5"
}

In [11]:
print("Gene Count: ", len(df))
sampleRow = df.sample()
print("Sample Row: ", sampleRow)
print("Sample Gene: ", sampleRow.iloc[0,0])

Gene Count:  24582
Sample Row:          Gene  SS2_15_0073:A9_unique.bam  SS2_15_0073:A8_unique.bam  \
2350  Hoxd12                          0                          0   

      SS2_15_0073:A7_unique.bam  SS2_15_0073:A6_unique.bam  \
2350                          0                          0   

      SS2_15_0073:A5_unique.bam  SS2_15_0073:A4_unique.bam  \
2350                          0                          0   

      SS2_15_0073:A3_unique.bam  SS2_15_0073:A2_unique.bam  \
2350                          0                          0   

      SS2_15_0073:A10_unique.bam  ...  SS2_15_0085:P15_unique.bam  \
2350                           0  ...                           0   

      SS2_15_0085:P16_unique.bam  SS2_15_0085:P17_unique.bam  \
2350                           0                           0   

      SS2_15_0085:P18_unique.bam  SS2_15_0085:P19_unique.bam  \
2350                           0                           0   

      SS2_15_0085:P20_unique.bam  SS2_15_0085:P24_uniqu

In [12]:
import numbers
df.columns = [idMappings[col.split(":")[0]] if col.split(":")[0] in idMappings else col for col in df.columns]
dfParsed = df.groupby(df.columns, axis=1).apply(lambda x: x.mean(axis=1) if isinstance(x.iloc[0,0], numbers.Number) else x.iloc[:,0])
dfParsed = dfParsed[dfParsed.columns[5:].tolist() + dfParsed.columns[:5].tolist()]

print("Grouped Gene Count: ", len(dfParsed))
sampleRow = dfParsed.sample()
print("Grouped Sample Row: ", sampleRow)
print("Grouped Sample Gene: ", sampleRow.iloc[0,0])

Grouped Gene Count:  24582
Grouped Sample Row:          Gene  Trunk E10.5  Trunk E9.5  Cranio E12.5  DRG E10.5  DRG E11.5  \
2381  Zc3h15    29.746528   58.578125     42.153646  47.266927  51.109375   

      DRG E12.5   DRG E9.5  
2381  46.841146  54.032552  
Grouped Sample Gene:  Zc3h15


# Leiden Algorithm

In [13]:
import leidenalg
import networkx as nx

In [37]:
G = nx.Graph()

for node in dfParsed.index:
    G.add_node(node)


genelessDf = dfParsed.drop(dfParsed.columns[0], axis=1)

print(genelessDf.head())

for x in genelessDf.index:
    for y in genelessDf.index:
        if x != y:
            correlation_coeff = genelessDf.loc[x].corr(genelessDf.loc[y])
            if correlation_coeff > 0.9:
                G.add_edge(x, y, weight=correlation_coeff)
    print(x)

print(len(G.nodes))
print(len(G.edges))

    DRG E10.5   DRG E11.5   DRG E12.5    DRG E9.5
0  280.343099  384.346354  538.942708  710.703125
1   59.985026   68.973958   97.622396  139.312500
2   35.949219   46.658854   70.117188   77.486979
3   30.908854   48.080729   61.325521   61.453125
4    0.223307    0.023438    0.085938    0.415365
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


KeyboardInterrupt: 

In [None]:
part = leidenalg.find_partition(G, leidenalg.ModularityVertexPartition)
print(part)

Clustering with 100 elements and 6 clusters
[0] 0, 1, 17, 18, 19, 20, 21, 29, 32, 39, 42, 50, 51, 56, 64, 65, 68, 71, 72,
    79, 82, 90, 92, 98
[1] 7, 10, 15, 26, 30, 31, 35, 38, 47, 48, 55, 62, 67, 77, 78, 87, 88, 97, 99
[2] 2, 14, 25, 27, 33, 34, 43, 44, 49, 52, 57, 59, 66, 76, 84, 91
[3] 4, 5, 6, 11, 16, 24, 36, 37, 40, 46, 53, 54, 58, 74, 85
[4] 9, 12, 13, 23, 28, 61, 69, 73, 80, 86, 89, 93, 94
[5] 3, 8, 22, 41, 45, 60, 63, 70, 75, 81, 83, 95, 96
