# Aracne Setup
Aracne uses information theoretic approaches for constructing gene regulatory networks using gene expression data. 

In [156]:
import os
import pandas
import matplotlib.pyplot as plt
import seaborn
import numpy

%matplotlib inline

class FilePaths():
    def __init__(self):
        self.dire = r'/home/b3053674/Documents/Miscellaneous/Aracne/AracneMicroarray'
        self.input_file = os.path.join(self.dire, 'MicroarrayDEGs.csv')
        self.output_filename = os.path.join(self.dire, 'ArcneOutput.adj')
        self.TFlist_file=os.path.join(self.dire, 'TFsInMicroarrayDataProbeIDs.txt')
        
        
        
        
F=FilePaths()


## Aracne Output Formatting 
Aracne output is a large tsv file that needs formatting to a network file for import into cytoscape.

### Get annotation data from input file

In [157]:
data = pandas.read_csv(F.input_file, sep='\t')
anno_df = data[['ProbeID','GeneSymbol']]

print(anno_df)

           ProbeID GeneSymbol
0    11746909_a_at       A1CF
1    11736238_a_at      ABCA5
2      11724734_at      ABCB8
3      11723976_at      ABCC8
4    11718612_a_at      ABCD4
5    11758217_s_at    ABHD17C
6    11744541_a_at      ACKR3
7      11730931_at      ACSS3
8      11738454_at     ACTBL2
9      11727682_at     ACVR2B
10   11733762_a_at   ADAMTSL1
11   11756406_x_at        ADM
12   11743232_a_at       AFDN
13   11743984_a_at      AKAP1
14   11723804_a_at     AKAP12
15   11716720_a_at     AKT1S1
16     11733006_at     AMOTL2
17   11719576_s_at        AMT
18   11719577_a_at        AMT
19   11755899_x_at   ANKRD36B
20   11755987_a_at    ANKRD44
21   11756425_a_at     ANKS1A
22   11727683_a_at      AP3B2
23   11734819_s_at      APBB2
24   11723975_x_at      APOL3
25   11722736_s_at      APPL1
26   11733928_a_at      ARMC8
27   11751585_a_at       ASB8
28   11716966_a_at       ATN1
29   11716154_x_at    B4GALT1
..             ...        ...
183  11736576_x_at     TMEM53
184  11716

### Parse Aracne output file into pandas.DataFrame

In [158]:
def parse_data(data_file):
    with open(data_file) as f:
        data = [i for i in f if i[0]!='>' ]
        
    data =[i.split('\t') for i in data]
    headers = [i[0] for i in data]
    df =pandas.DataFrame(data, index=headers)
    
    return df
        
data = parse_data(F.output_filename)


### Extract the gene symbols from input file, merge with aracne output and replace ID's with GeneSymbols

In [159]:
def merge_with_annotation(data, anno_data):
    """
    
    """
    ## filter anno_data by entries in data 
    anno_data = anno_data.set_index('ProbeID')
    filtered = anno_data[anno_data.index.isin(list(data.index))]
    merged = pandas.merge(data, anno_data, left_index=True, right_index = True)
    merged = merged.reset_index()
    merged = merged.set_index(['ProbeID','GeneSymbol'])
    merged = merged.replace(merged.reset_index(level=1)['GeneSymbol'])
    merged = merged.drop(0, axis=1)
    merged.columns = range(merged.shape[1])
    return merged

merged = merge_with_annotation(data, anno_df)
print(merged)

                              0           1         2           3         4    \
ProbeID       GeneSymbol                                                        
11746909_a_at A1CF          ABCA5  0.08558538     ABCB8  0.06253153   ABHD17C   
11736238_a_at ABCA5          A1CF  0.08558538     ABCC8  0.08626567   ABHD17C   
11724734_at   ABCB8          A1CF  0.06253153     ABCC8  0.06061752     ACKR3   
11723976_at   ABCC8         ABCA5  0.08626567     ABCB8  0.06061752     ABCD4   
11718612_a_at ABCD4         ABCC8  0.09933948   ABHD17C  0.18742451     ACKR3   
11758217_s_at ABHD17C        A1CF  0.09838815     ABCA5  0.06462951     ABCC8   
11744541_a_at ACKR3         ABCA5  0.07400585     ABCB8  0.09150306     ABCC8   
11730931_at   ACSS3         ACKR3   0.0628304    ACTBL2  0.07066915  ADAMTSL1   
11738454_at   ACTBL2        ABCB8  0.09367476     ABCC8  0.06537474   ABHD17C   
11727682_at   ACVR2B         A1CF  0.08444491     ABCA5  0.06836811     ABCC8   
11733762_a_at ADAMTSL1      

### Create a network table with "interactant" as index and "interactors" and MI weights as values

In [187]:


def create_network_table(data):
    """
    
    
    """
    ids = list(set(data.index.get_level_values(0)))
    df_dct = {}
    for i in ids:
        df = data.loc[i]
        df=df.reset_index()
        df = df.set_index(['GeneSymbol'])
        x = [i for i in range(df.shape[1]) if i%2==0]
        x1 = [i+1 for i in range(df.shape[1]) if i%2==0]
        MI = df[x1]
        names = df[x]
        gene = list(set(df.index.get_level_values(0)))[0]
        MI = pandas.DataFrame(pandas.to_numeric(MI.loc[gene])  ).reset_index(drop=True) 
        names = pandas.DataFrame(names.loc[gene]).reset_index(drop=True)
        df =pandas.concat([names,MI], axis=1)
        df.columns = ['Interactors','MI']
        df =df.fillna(value=numpy.nan)
        df = df.dropna(how='any')
        df.index.name = 'index'
        df_dct[(i,gene)] = df

    df = pandas.concat(df_dct)
    df.index=df.index.droplevel([0,2])
    df.index.name = 'Interactie'
    df = df.reset_index()
    df = df.set_index(['Interactie','Interactors'])
    df = df.sort_index(level=[0,1])
    return df
    
network = create_network_table(merged)
print(network)

                              MI
Interactie Interactors          
A1CF       ABCA5        0.085585
           ABCB8        0.062532
           ABHD17C      0.098388
           ACVR2B       0.084445
           ADM          0.073448
           AKAP12       0.080827
           AMOTL2       0.063569
           AMT          0.080201
           AMT          0.063460
           APBB2        0.125219
           APOL3        0.073793
           APPL1        0.069815
           ATN1         0.059295
           BACE1        0.096685
           BCL7A        0.099389
           BTG2         0.064006
           CAMK2A       0.111331
           CBX6         0.116430
           CD28         0.095726
           CDC20B       0.080945
           CMTM4        0.107246
           COL3A1       0.061975
           COL7A1       0.109989
           COLCA1       0.058729
           CTXN1        0.069069
           CX3CL1       0.083875
           CXCR1        0.120169
           CYFIP2       0.082665
          

### Remove Duplicate Entries

In [192]:
network = network.drop_duplicates()

### Write to file

In [186]:
def to_file(df, fname):
    df = df.reset_index()
    df.to_csv(fname, index=False, sep='\t')    
to_file(network, 'Network.txt')
