# ProtOnt
### Volcano Plots and LOESS Curves Creator based on protein LFQ ontogeny 

In [16]:
# 1. MUST DO: Import packages
import utils
import numpy as np
from sklearn.preprocessing import MinMaxScaler

<font size="2">If you need to create the proper input file first, upload the protein TSV (report.pg_matrix.tsv) and a metadata file containing the sample names in the same order as the protein file (when transposed). The output of this merging process is a CSV file called final_data.csv, which can be found in the same folder as the metadata file provided.</font>

In [None]:
# 2. IF NEEDED: Specifiy paths to meta and protein file
protein_file = 'C:/Users/CH258405/Documents/ProtONT/example_input/report.pg_matrix.tsv'  
meta_file = 'C:/Users/CH258405/Documents/ProtONT/example_input/meta.csv'  

In [None]:
# 3. IF NEEDED: create input file from metadata and protein file
utils.create_input_file(protein_file, meta_file)

<font size="2">This file has to be of the same format as final_data.csv.
The first 15 columns containing metadata, all following columns containing LFQ data. The metadata must include HOL and DOL columns as header.</font>

In [17]:
# 4. MUST DO: specify path to input file
path_to_csv_file = "C:/Users/CH258405/Documents/ProtONT/example_input//input.csv"

In [18]:
# 5. MUST DO: convert csv to pandas dataframe
pd = utils.read_csv_get_pd(path_to_csv_file)
pd['HOL'] = pd['HOL'].astype(int)

In [None]:
# 6. IF NEEDED: choose the HoL-span you want to work with 
hol_min = 150
hol_max = 270
pd = pd[(pd['HOL'] >= hol_min) & (pd['HOL'] <= hol_max)]

In [19]:
# 7. IF NEEDED: log transform the intensity data
for column in pd.columns[15:]: 
    pd[column] = np.log2(pd[column])

<font size="2">LOESS plots will be saved in a folder called 'results_LOESS', located in the same directory as the specified path to the input data. Additionally, a text file will be generated listing all excluded proteins due to insufficient data. </font>

In [20]:
# 8.0 generate LOESS plots
utils.generate_loess_plots(pd, path_to_csv_file)

In [None]:
# 8.1 generate LOESS plot on two chosen factors (first factor represents the x-axis)
fst_factor = 'HOL'
snd_factor = 'Sex'
utils.generate_loess_plots_2factors(pd, path_to_csv_file, fst_factor, snd_factor)

In [None]:
# 8.1.1 generate a txt, containing all significant different proteins concentration when snd_factor is compared. 
fst_factor = 'DOL'
snd_factor = 'Sex'
utils.significance_loess_2factors(pd, path_to_csv_file, fst_factor, snd_factor, alpha=0.1)

<font size="2">If normalization is required for LOESS plots, perform step 9 before step 8, and ensure that normalization is not repeated before step 10 (otherwise, the data will be normalized again). </font>

In [None]:
# 9. IF NEEDED: normalization (by Scaling) for intensity data
scaler = MinMaxScaler()
pd.iloc[:, 15:] = scaler.fit_transform(pd.iloc[:, 15:])

<font size="2">Volcano plott will be saved in a folder called 'results_volcano', located in the same directory as the specified path to the input data. Additionally, a text file will be generated listing all excluded proteins due to insufficient data. </font>

In [None]:
# 10. generate Vulcano plots
utils.generate_volcano_plot(pd, path_to_csv_file)