# pyVolcano
Volcano plot working over matplotlib, numpy and pandas

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
pval_col = 'padj'
gene_col = 'row'
log_col = 'log2FoldChange'
pval_thresh = 0.01
log_thresh = 1
up_color = 'green'
down_color = 'red'
title = 'No_cell_comp'
n_names2show = 10

In [None]:
in_file = '../data/DE_data.xlsx'

In [None]:
file_type = in_file.split('.')[-1]
if file_type == 'tsv':
    DF = pd.read_csv(in_file,sep='\t',index_col=0).reset_index(drop=True)
elif file_type == 'xlsx':
    DF = pd.read_excel(in_file,index_col=0)
elif file_type == 'csv':
    DF = pd.read_csv(in_file,sep=',',index_col=0).reset_index(drop=True)
else:
    raise NameError("Invalid input format. Has to be either .tsv, .csv or .xlsx.")

In [None]:
DF = DF.loc[DF['design']=='No_cell_comp']
DF

## Sort DF properly

In [None]:
DF.insert(2,'absLogF',np.absolute(DF.loc[:,'log2FoldChange']))
DF

In [None]:
DF = DF.sort_values(['padj','log2FoldChange'],ascending=[True,False]).reset_index(drop=True)
DF

## Insert color

In [None]:
DF.insert(4,'color','black')
DF

In [None]:
down = (DF.loc[:,pval_col]<pval_thresh)&(DF.loc[:,log_col]<-log_thresh)
up = (DF.loc[:,pval_col]<pval_thresh)&(DF.loc[:,log_col]>log_thresh)

In [None]:
DF.loc[down,'color'] = down_color
DF.loc[up,'color'] = up_color

In [None]:
DF

## Plot

In [None]:
x = DF.loc[:,log_col].values
y = -np.log10(DF.loc[:,pval_col].values)
names = DF.loc[:,gene_col]

fig,ax = plt.subplots(figsize=[8,8])
ax.axhline(-np.log10(pval_thresh),color='gray',linestyle='--')
ax.axvline(log_thresh,color='gray',linestyle='--')
ax.axvline(-log_thresh,color='gray',linestyle='--')
ax.scatter(x,y,c=DF.loc[:,'color'].values,s=3)
for i in range(n_names2show):
    ha = "right" if x[i] > 0 else "left"
    ax.text(x[i], y[i] , s = names[i],ha=ha)
ax.set_ylabel(r'$-log_{10}(pval)$')
ax.set_xlabel(r'$log_2FoldChange$')
ax.set_title(title)