Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More linting ii #175

Open
wants to merge 6 commits into
base: more_linting
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions alphabase/constants/aa.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,17 @@ def reset_AA_df():
def reset_AA_Composition():
global AA_Composition
AA_Composition = {}
for aa, formula, mass in AA_DF.values:
for aa, formula, _mass in AA_DF.values:
AA_Composition[aa] = dict(parse_formula(formula))
return AA_Composition


reset_AA_Composition()


def reset_AA_atoms(atom_replace_dict: typing.Dict = {}):
def reset_AA_atoms(atom_replace_dict: typing.Dict = None):
if atom_replace_dict is None:
atom_replace_dict = {}
reset_elements()
replace_atoms(atom_replace_dict)
reset_AA_mass()
Expand Down
18 changes: 10 additions & 8 deletions alphabase/constants/isotope.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,7 @@ def _calc_one_elem_cum_dist(element_cum_dist: np.ndarray, element_cum_mono: np.n
class IsotopeDistribution:
def __init__(
self,
max_elem_num_dict: dict = {
Copy link
Collaborator

@jalew188 jalew188 May 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old default parameter style is good for sphinx documentation. def func(arg=generate_args()) will be compiled and the values of generated_args() will be displayed in the docs.

"C": 2000,
"H": 5000,
"N": 1000,
"O": 1000,
"S": 200,
"P": 200,
},
max_elem_num_dict: dict = None,
):
"""Faster calculation of isotope abundance distribution by pre-building
isotope distribution tables for most common elements.
Expand Down Expand Up @@ -193,6 +186,15 @@ def __init__(
{element: mono position array of cumulated isotope distribution},
and mono position array is a 1-D int np.ndarray.
"""
if max_elem_num_dict is None:
max_elem_num_dict = {
"C": 2000,
"H": 5000,
"N": 1000,
"O": 1000,
"S": 200,
"P": 200,
}
self.element_to_cum_dist_dict = {}
self.element_to_cum_mono_idx = {}
for elem, n in max_elem_num_dict.items():
Expand Down
5 changes: 1 addition & 4 deletions alphabase/constants/modification.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,7 @@ def calc_modloss_mass_with_importance(
mod_losses = np.zeros(nAA + 2)
mod_losses[mod_sites] = [MOD_LOSS_MASS[mod] for mod in mod_names]
_loss_importance = np.zeros(nAA + 2)
_loss_importance[mod_sites] = [
MOD_LOSS_IMPORTANCE[mod] if mod in MOD_LOSS_IMPORTANCE else 0
for mod in mod_names
]
_loss_importance[mod_sites] = [MOD_LOSS_IMPORTANCE.get(mod, 0) for mod in mod_names]

# Will not consider the modloss if the corresponding modloss_importance is 0
mod_losses[_loss_importance == 0] = 0
Expand Down
21 changes: 10 additions & 11 deletions alphabase/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,16 @@ def set_truncate(self, truncate: bool = True):
def __setattr__(self, name, value):
try:
super().__setattr__(name, value)
except NotImplementedError:
except NotImplementedError as e:
if not self.truncate:
if name in self.group_names:
raise KeyError(f"Group name '{name}' cannot be truncated")
raise KeyError(f"Group name '{name}' cannot be truncated") from e
elif name in self.dataset_names:
raise KeyError(f"Dataset name '{name}' cannot be truncated")
raise KeyError(f"Dataset name '{name}' cannot be truncated") from e
elif name in self.dataframe_names:
raise KeyError(f"Dataframe name '{name}' cannot be truncated")
raise KeyError(
f"Dataframe name '{name}' cannot be truncated"
) from e
if isinstance(value, (np.ndarray, pd.core.series.Series)):
self.add_dataset(name, value)
elif isinstance(value, (dict, pd.DataFrame)):
Expand All @@ -217,7 +219,7 @@ def __setattr__(self, name, value):
"Only (str, bool, int, float, np.ndarray, "
"pd.core.series.Series, dict pd.DataFrame) types are "
"accepted.",
)
) from e

def add_dataset(
self,
Expand Down Expand Up @@ -252,12 +254,12 @@ def add_dataset(
# chunks=array.shape,
maxshape=tuple([None for i in array.shape]),
)
except TypeError:
except TypeError as e:
raise NotImplementedError(
f"Type {array.dtype} is not understood. "
"If this is a string format, try to cast it to "
"np.dtype('O') as possible solution."
)
) from e
dataset = HDF_Dataset(
file_name=self.file_name,
name=f"{self.name}/{name}",
Expand Down Expand Up @@ -496,10 +498,7 @@ def __init__(
>>> hdf_file.dfs.df1.data_from
"colleagues"
"""
if delete_existing:
mode = "w"
else:
mode = "a"
mode = "w" if delete_existing else "a"
with h5py.File(file_name, mode): # , swmr=True):
pass
super().__init__(
Expand Down
9 changes: 4 additions & 5 deletions alphabase/io/tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,10 @@ def create_empty_mmap(shape: tuple, dtype: np.dtype, path: str = None, overwrite
)
else:
# check that if overwrite is false the file does not already exist
if not overwrite:
if os.path.exists(path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
)
if not overwrite and os.path.exists(path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
)
if not os.path.basename.endswith(".hdf"):
raise ValueError("The chosen file name needs to end with .hdf")
if os.path.isdir(os.path.commonpath(path)):
Expand Down
59 changes: 31 additions & 28 deletions alphabase/peptide/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,12 @@ def mask_fragments_for_charge_greater_than_precursor_charge(
precursor_charge_array: np.ndarray,
nAA_array: np.ndarray,
*,
candidate_fragment_charges: list = [2, 3, 4],
candidate_fragment_charges: list = None,
):
"""Mask the fragment dataframe when
the fragment charge is larger than the precursor charge"""
if candidate_fragment_charges is None:
candidate_fragment_charges = [2, 3, 4]
precursor_charge_array = np.repeat(precursor_charge_array, nAA_array - 1)
for col in fragment_df.columns:
for charge in candidate_fragment_charges:
Expand Down Expand Up @@ -681,8 +683,8 @@ def flatten_fragments(
fragment_intensity_df: pd.DataFrame,
min_fragment_intensity: float = -1,
keep_top_k_fragments: int = 1000,
custom_columns: list = ["type", "number", "position", "charge", "loss_type"],
custom_df: Dict[str, pd.DataFrame] = {},
custom_columns: list = None,
custom_df: Dict[str, pd.DataFrame] = None,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Converts the tabular fragment format consisting of
Expand Down Expand Up @@ -750,6 +752,10 @@ def flatten_fragments(
- charge: uint8, fragment charge
- loss_type: int16, fragment loss type, 0=noloss, 17=NH3, 18=H2O, 98=H3PO4 (phos), ...
"""
if custom_df is None:
custom_df = {}
if custom_columns is None:
custom_columns = ["type", "number", "position", "charge", "loss_type"]
if len(precursor_df) == 0:
return precursor_df, pd.DataFrame()
# new dataframes for fragments and precursors are created
Expand Down Expand Up @@ -1047,25 +1053,24 @@ def create_fragment_mz_dataframe(
pd.DataFrame
`fragment_mz_df` with given `charged_frag_types`
"""
if reference_fragment_df is None:
if "frag_start_idx" in precursor_df.columns:
# raise ValueError(
# "`precursor_df` contains 'frag_start_idx' column, "\
# "please provide `reference_fragment_df` argument"
# )
fragment_mz_df = init_fragment_by_precursor_dataframe(
precursor_df,
charged_frag_types,
dtype=dtype,
)
return create_fragment_mz_dataframe(
precursor_df=precursor_df,
charged_frag_types=charged_frag_types,
reference_fragment_df=fragment_mz_df,
inplace_in_reference=True,
batch_size=batch_size,
dtype=dtype,
)
if reference_fragment_df is None and "frag_start_idx" in precursor_df.columns:
# raise ValueError(
# "`precursor_df` contains 'frag_start_idx' column, "\
# "please provide `reference_fragment_df` argument"
# )
fragment_mz_df = init_fragment_by_precursor_dataframe(
precursor_df,
charged_frag_types,
dtype=dtype,
)
return create_fragment_mz_dataframe(
precursor_df=precursor_df,
charged_frag_types=charged_frag_types,
reference_fragment_df=fragment_mz_df,
inplace_in_reference=True,
batch_size=batch_size,
dtype=dtype,
)
if "nAA" not in precursor_df.columns:
# fast
return create_fragment_mz_dataframe_by_sort_precursor(
Expand Down Expand Up @@ -1255,12 +1260,10 @@ def filter_fragment_number(
if not set(["frag_start_idx", "frag_stop_idx"]).issubset(precursor_df.columns):
raise KeyError("frag_start_idx and frag_stop_idx not in dataframe")

for i, (start_idx, stop_idx, n_allowed_lib) in enumerate(
zip(
precursor_df["frag_start_idx"].values,
precursor_df["frag_stop_idx"].values,
precursor_df[n_fragments_allowed_column_name].values,
)
for start_idx, stop_idx, n_allowed_lib in zip(
precursor_df["frag_start_idx"].values,
precursor_df["frag_stop_idx"].values,
precursor_df[n_fragments_allowed_column_name].values,
):
_allowed = min(n_allowed_lib, n_allowed)

Expand Down
24 changes: 11 additions & 13 deletions alphabase/peptide/precursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,16 @@ def refine_precursor_df(
"""
if ensure_data_validity:
df.fillna("", inplace=True)
if "charge" in df.columns:
if df.charge.dtype not in [
"int",
"int8",
"int64",
"int32",
# np.int64, np.int32, np.int8,
]:
df["charge"] = df["charge"].astype(np.int8)
if "mod_sites" in df.columns:
if df.mod_sites.dtype not in ["O", "U"]:
df["mod_sites"] = df.mod_sites.astype("U")
if "charge" in df.columns and df.charge.dtype not in [
"int",
"int8",
"int64",
"int32",
# np.int64, np.int32, np.int8,
]:
df["charge"] = df["charge"].astype(np.int8)
if "mod_sites" in df.columns and df.mod_sites.dtype not in ["O", "U"]:
df["mod_sites"] = df.mod_sites.astype("U")

if "nAA" not in df.columns:
df["nAA"] = df.sequence.str.len().astype(np.int32)
Expand Down Expand Up @@ -107,7 +105,7 @@ def update_precursor_mz(
# precursor_mz_idx = precursor_df.columns.get_loc(
# 'precursor_mz'
# )
for nAA, big_df_group in _grouped:
for _, big_df_group in _grouped:
for i in range(0, len(big_df_group), batch_size):
batch_end = i + batch_size

Expand Down
29 changes: 18 additions & 11 deletions alphabase/protein/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ def add_single_peptide_labeling(
nterm_label_mod: str,
cterm_label_mod: str,
):
add_nterm_label = True if nterm_label_mod else False
add_cterm_label = True if cterm_label_mod else False
add_nterm_label = bool(nterm_label_mod)
add_cterm_label = bool(cterm_label_mod)
if mod_sites:
_sites = mod_sites.split(";")
if "0" in _sites:
Expand Down Expand Up @@ -478,10 +478,7 @@ def create_labeling_peptide_df(
if len(peptide_df) == 0:
return peptide_df

if inplace:
df = peptide_df
else:
df = peptide_df.copy()
df = peptide_df if inplace else peptide_df.copy()

(label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod) = parse_labels(labels)

Expand All @@ -507,7 +504,7 @@ def protein_idxes_to_names(protein_idxes: str, protein_names: list):

def append_special_modifications(
df: pd.DataFrame,
var_mods: list = ["Phospho@S", "Phospho@T", "Phospho@Y"],
var_mods: list = None,
min_mod_num: int = 0,
max_mod_num: int = 1,
max_peptidoform_num: int = 100,
Expand Down Expand Up @@ -553,6 +550,8 @@ def append_special_modifications(
pd.DataFrame
The precursor_df with new modification added.
"""
if var_mods is None:
var_mods = ["Phospho@S", "Phospho@T", "Phospho@Y"]
if len(var_mods) == 0 or len(df) == 0:
return df

Expand Down Expand Up @@ -644,7 +643,7 @@ class SpecLibFasta(SpecLibBase):

def __init__(
self,
charged_frag_types: list = ["b_z1", "b_z2", "y_z1", "y_z2"],
charged_frag_types: list = None,
*,
protease: str = "trypsin",
max_missed_cleavages: int = 2,
Expand All @@ -654,12 +653,12 @@ def __init__(
precursor_charge_max: int = 4,
precursor_mz_min: float = 400.0,
precursor_mz_max: float = 2000.0,
var_mods: list = ["Acetyl@Protein_N-term", "Oxidation@M"],
var_mods: list = None,
min_var_mod_num: int = 0,
max_var_mod_num: int = 2,
fix_mods: list = ["Carbamidomethyl@C"],
fix_mods: list = None,
labeling_channels: dict = None,
special_mods: list = [],
special_mods: list = None,
min_special_mod_num: int = 0,
max_special_mod_num: int = 1,
special_mods_cannot_modify_pep_n_term: bool = False,
Expand Down Expand Up @@ -758,6 +757,14 @@ def __init__(
include_contaminants : bool, optional
If include contaminants.fasta, by default False
"""
if special_mods is None:
special_mods = []
if fix_mods is None:
fix_mods = ["Carbamidomethyl@C"]
if var_mods is None:
var_mods = ["Acetyl@Protein_N-term", "Oxidation@M"]
if charged_frag_types is None:
charged_frag_types = ["b_z1", "b_z2", "y_z1", "y_z2"]
super().__init__(
charged_frag_types=charged_frag_types,
precursor_mz_min=precursor_mz_min,
Expand Down
7 changes: 2 additions & 5 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ def parse_ap(precursor):
Parser to parse peptide strings
"""
items = precursor.split("_")
if len(items) == 3:
decoy = 1
else:
decoy = 0
decoy = 1 if len(items) == 3 else 0
modseq = items[0]
charge = items[-1]

Expand Down Expand Up @@ -81,7 +78,7 @@ def _init_modification_mapping(self):
def _load_file(self, filename):
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
df = pd.DataFrame({col: dataset[col] for col in dataset.keys()})
df = pd.DataFrame({col: dataset[col] for col in dataset})
df["raw_name"] = os.path.basename(filename)[: -len(".ms_data.hdf")]
df["precursor"] = df["precursor"].str.decode("utf-8")
# df['naked_sequence'] = df['naked_sequence'].str.decode('utf-8')
Expand Down
Loading
Loading