In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import vaep
from vaep.io import data_objects

from src.config import FNAME_C_PEPTIDES, FNAME_C_EVIDENCE, FNAME_C_PG, FNAME_C_GENES

FNAME_C_PEPTIDES, FNAME_C_EVIDENCE, FNAME_C_PG, FNAME_C_GENES

## Aggregated Peptides

In [None]:
peptide_counter = data_objects.PeptideCounter(FNAME_C_PEPTIDES)
N_SAMPLES = len(peptide_counter.loaded)

In [None]:
peptide_counter

In [None]:
peptide_counts = peptide_counter.get_df_counts()
# peptide_counts.index += 1 
peptide_counts.head()

In [None]:
peptide_counts.index.name

In [None]:
peptide_counts.describe(percentiles=np.linspace(0.1,1,10))

In [None]:
FEAT_NAME = 'aggregated peptide'
ax = peptide_counts['counts'].plot(
    figsize=(15, 10),
    ylabel='counts',
    xlabel=f'{FEAT_NAME} count ordered by completeness',
    title=f'Count and proportion of {len(peptide_counts):,d} {FEAT_NAME}s over {N_SAMPLES:,d} samples',
    grid=True)

# default nearly okay, but rather customize to see minimal and maxium proportion
# ax = peptide_counts['proportion'].plot(secondary_y=True, style='b')

ax2 = vaep.plotting.add_prop_as_second_yaxis(ax=ax, n_samples=N_SAMPLES)
ax2.set_ylabel('proportion')
ax = vaep.plotting.format_large_numbers(ax=ax)


## Evidence - Peptides by charge and modifications



In [None]:
evidence_counter = data_objects.EvidenceCounter(FNAME_C_EVIDENCE)
evidence_count = evidence_counter.get_df_counts()
evidence_count.head()

## Protein Groups

In [None]:
pg_counter = data_objects.ProteinGroupsCounter(FNAME_C_PG)
pg_count = pg_counter.get_df_counts()
pg_count.head()

## Genes

In [None]:
gene_counter = data_objects.FeatureCounter(FNAME_C_GENES, counting_fct=None, idx_names=['gene'])
gene_count = gene_counter.get_df_counts()
gene_count.head() # remove NaN entry


In [None]:
gene_count = gene_count.iloc[1:]
gene_count.head() 