Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More linting ii #175

Merged
merged 9 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,7 @@ repos:
hooks:
- id: ruff-format
- id: ruff
args:
- "--fix"

exclude: .bumpversion.cfg
2 changes: 1 addition & 1 deletion alphabase/constants/aa.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def reset_AA_df():
def reset_AA_Composition():
global AA_Composition
AA_Composition = {}
for aa, formula, mass in AA_DF.values:
for aa, formula, _mass in AA_DF.values:
AA_Composition[aa] = dict(parse_formula(formula))
return AA_Composition

Expand Down
5 changes: 1 addition & 4 deletions alphabase/constants/modification.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,7 @@ def calc_modloss_mass_with_importance(
mod_losses = np.zeros(nAA + 2)
mod_losses[mod_sites] = [MOD_LOSS_MASS[mod] for mod in mod_names]
_loss_importance = np.zeros(nAA + 2)
_loss_importance[mod_sites] = [
MOD_LOSS_IMPORTANCE[mod] if mod in MOD_LOSS_IMPORTANCE else 0
for mod in mod_names
]
_loss_importance[mod_sites] = [MOD_LOSS_IMPORTANCE.get(mod, 0) for mod in mod_names]

# Will not consider the modloss if the corresponding modloss_importance is 0
mod_losses[_loss_importance == 0] = 0
Expand Down
21 changes: 10 additions & 11 deletions alphabase/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,16 @@ def set_truncate(self, truncate: bool = True):
def __setattr__(self, name, value):
try:
super().__setattr__(name, value)
except NotImplementedError:
except NotImplementedError as e:
if not self.truncate:
if name in self.group_names:
raise KeyError(f"Group name '{name}' cannot be truncated")
raise KeyError(f"Group name '{name}' cannot be truncated") from e
elif name in self.dataset_names:
raise KeyError(f"Dataset name '{name}' cannot be truncated")
raise KeyError(f"Dataset name '{name}' cannot be truncated") from e
elif name in self.dataframe_names:
raise KeyError(f"Dataframe name '{name}' cannot be truncated")
raise KeyError(
f"Dataframe name '{name}' cannot be truncated"
) from e
if isinstance(value, (np.ndarray, pd.core.series.Series)):
self.add_dataset(name, value)
elif isinstance(value, (dict, pd.DataFrame)):
Expand All @@ -217,7 +219,7 @@ def __setattr__(self, name, value):
"Only (str, bool, int, float, np.ndarray, "
"pd.core.series.Series, dict pd.DataFrame) types are "
"accepted.",
)
) from e

def add_dataset(
self,
Expand Down Expand Up @@ -252,12 +254,12 @@ def add_dataset(
# chunks=array.shape,
maxshape=tuple([None for i in array.shape]),
)
except TypeError:
except TypeError as e:
raise NotImplementedError(
f"Type {array.dtype} is not understood. "
"If this is a string format, try to cast it to "
"np.dtype('O') as possible solution."
)
) from e
dataset = HDF_Dataset(
file_name=self.file_name,
name=f"{self.name}/{name}",
Expand Down Expand Up @@ -496,10 +498,7 @@ def __init__(
>>> hdf_file.dfs.df1.data_from
"colleagues"
"""
if delete_existing:
mode = "w"
else:
mode = "a"
mode = "w" if delete_existing else "a"
with h5py.File(file_name, mode): # , swmr=True):
pass
super().__init__(
Expand Down
9 changes: 4 additions & 5 deletions alphabase/io/tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,10 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str:
The file path if it is valid.
"""
# check overwrite status and existence of file
if not overwrite:
if os.path.exists(abs_file_path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
)
if not overwrite and os.path.exists(abs_file_path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
)

# ensure that the filename conforms to the naming convention
if not os.path.basename.endswith(".hdf"):
Expand Down
47 changes: 22 additions & 25 deletions alphabase/peptide/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,25 +1047,24 @@ def create_fragment_mz_dataframe(
pd.DataFrame
`fragment_mz_df` with given `charged_frag_types`
"""
if reference_fragment_df is None:
if "frag_start_idx" in precursor_df.columns:
# raise ValueError(
# "`precursor_df` contains 'frag_start_idx' column, "\
# "please provide `reference_fragment_df` argument"
# )
fragment_mz_df = init_fragment_by_precursor_dataframe(
precursor_df,
charged_frag_types,
dtype=dtype,
)
return create_fragment_mz_dataframe(
precursor_df=precursor_df,
charged_frag_types=charged_frag_types,
reference_fragment_df=fragment_mz_df,
inplace_in_reference=True,
batch_size=batch_size,
dtype=dtype,
)
if reference_fragment_df is None and "frag_start_idx" in precursor_df.columns:
# raise ValueError(
# "`precursor_df` contains 'frag_start_idx' column, "\
# "please provide `reference_fragment_df` argument"
# )
fragment_mz_df = init_fragment_by_precursor_dataframe(
precursor_df,
charged_frag_types,
dtype=dtype,
)
return create_fragment_mz_dataframe(
precursor_df=precursor_df,
charged_frag_types=charged_frag_types,
reference_fragment_df=fragment_mz_df,
inplace_in_reference=True,
batch_size=batch_size,
dtype=dtype,
)
if "nAA" not in precursor_df.columns:
# fast
return create_fragment_mz_dataframe_by_sort_precursor(
Expand Down Expand Up @@ -1255,12 +1254,10 @@ def filter_fragment_number(
if not set(["frag_start_idx", "frag_stop_idx"]).issubset(precursor_df.columns):
raise KeyError("frag_start_idx and frag_stop_idx not in dataframe")

for i, (start_idx, stop_idx, n_allowed_lib) in enumerate(
zip(
precursor_df["frag_start_idx"].values,
precursor_df["frag_stop_idx"].values,
precursor_df[n_fragments_allowed_column_name].values,
)
for start_idx, stop_idx, n_allowed_lib in zip(
precursor_df["frag_start_idx"].values,
precursor_df["frag_stop_idx"].values,
precursor_df[n_fragments_allowed_column_name].values,
):
_allowed = min(n_allowed_lib, n_allowed)

Expand Down
24 changes: 11 additions & 13 deletions alphabase/peptide/precursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,16 @@ def refine_precursor_df(
"""
if ensure_data_validity:
df.fillna("", inplace=True)
if "charge" in df.columns:
if df.charge.dtype not in [
"int",
"int8",
"int64",
"int32",
# np.int64, np.int32, np.int8,
]:
df["charge"] = df["charge"].astype(np.int8)
if "mod_sites" in df.columns:
if df.mod_sites.dtype not in ["O", "U"]:
df["mod_sites"] = df.mod_sites.astype("U")
if "charge" in df.columns and df.charge.dtype not in [
"int",
"int8",
"int64",
"int32",
# np.int64, np.int32, np.int8,
]:
df["charge"] = df["charge"].astype(np.int8)
if "mod_sites" in df.columns and df.mod_sites.dtype not in ["O", "U"]:
df["mod_sites"] = df.mod_sites.astype("U")

if "nAA" not in df.columns:
df["nAA"] = df.sequence.str.len().astype(np.int32)
Expand Down Expand Up @@ -107,7 +105,7 @@ def update_precursor_mz(
# precursor_mz_idx = precursor_df.columns.get_loc(
# 'precursor_mz'
# )
for nAA, big_df_group in _grouped:
for _, big_df_group in _grouped:
for i in range(0, len(big_df_group), batch_size):
batch_end = i + batch_size

Expand Down
11 changes: 4 additions & 7 deletions alphabase/protein/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def read_fasta_file(fasta_filename: str = ""):
protein information,
{protein_id:str, full_name:str, gene_name:str, description:str, sequence:str}
"""
with open(fasta_filename, "rt", encoding="utf-8") as handle:
with open(fasta_filename, encoding="utf-8") as handle:
iterator = SeqIO.parse(handle, "fasta")
while iterator:
try:
Expand Down Expand Up @@ -429,8 +429,8 @@ def add_single_peptide_labeling(
nterm_label_mod: str,
cterm_label_mod: str,
):
add_nterm_label = True if nterm_label_mod else False
add_cterm_label = True if cterm_label_mod else False
add_nterm_label = bool(nterm_label_mod)
add_cterm_label = bool(cterm_label_mod)
if mod_sites:
_sites = mod_sites.split(";")
if "0" in _sites:
Expand Down Expand Up @@ -478,10 +478,7 @@ def create_labeling_peptide_df(
if len(peptide_df) == 0:
return peptide_df

if inplace:
df = peptide_df
else:
df = peptide_df.copy()
df = peptide_df if inplace else peptide_df.copy()

(label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod) = parse_labels(labels)

Expand Down
7 changes: 2 additions & 5 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ def parse_ap(precursor):
Parser to parse peptide strings
"""
items = precursor.split("_")
if len(items) == 3:
decoy = 1
else:
decoy = 0
decoy = 1 if len(items) == 3 else 0
modseq = items[0]
charge = items[-1]

Expand Down Expand Up @@ -81,7 +78,7 @@ def _init_modification_mapping(self):
def _load_file(self, filename):
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
df = pd.DataFrame({col: dataset[col] for col in dataset.keys()})
df = pd.DataFrame({col: dataset[col] for col in dataset})
df["raw_name"] = os.path.basename(filename)[: -len(".ms_data.hdf")]
df["precursor"] = df["precursor"].str.decode("utf-8")
# df['naked_sequence'] = df['naked_sequence'].str.decode('utf-8')
Expand Down
9 changes: 4 additions & 5 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,7 @@ def parse_mod_seq(
0 for N-term; -1 for C-term; 1 to N for normal modifications.
"""
PeptideModSeq = modseq
if modseq[0] == "_":
underscore_for_ncterm = True
else:
underscore_for_ncterm = False
underscore_for_ncterm = modseq[0] == "_"
mod_list = []
site_list = []
site = PeptideModSeq.find(mod_sep[0])
Expand Down Expand Up @@ -136,7 +133,7 @@ def __init__(
fdr=0.01,
keep_decoy=False,
fixed_C57=True,
mod_seq_columns=["Modified sequence"],
mod_seq_columns=None,
**kwargs,
):
"""Reader for MaxQuant msms.txt and evidence.txt
Expand Down Expand Up @@ -168,6 +165,8 @@ def __init__(
The columns to find modified sequences,
by default ['Modified sequence']
"""
if mod_seq_columns is None:
mod_seq_columns = ["Modified sequence"]
super().__init__(
column_mapping=column_mapping,
modification_mapping=modification_mapping,
Expand Down
5 changes: 1 addition & 4 deletions alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@


def _is_fragger_decoy(proteins):
for prot in proteins:
if not prot.lower().startswith("rev_"):
return False
return True
return all(prot.lower().startswith("rev_") for prot in proteins)


mass_mapped_mods = psm_reader_yaml["msfragger_pepxml"]["mass_mapped_mods"]
Expand Down
29 changes: 16 additions & 13 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,10 @@ def _reverse_mod_mapping(self):
for this_mod, other_mod in self.modification_mapping.items():
if isinstance(other_mod, (list, tuple)):
for _mod in other_mod:
if _mod in self.rev_mod_mapping:
if this_mod.endswith("Protein N-term"):
continue
if _mod in self.rev_mod_mapping and this_mod.endswith(
"Protein N-term"
):
continue
self.rev_mod_mapping[_mod] = this_mod
else:
self.rev_mod_mapping[other_mod] = this_mod
Expand Down Expand Up @@ -344,7 +345,7 @@ def normalize_rt_by_raw_name(self):
self.norm_rt()
if "raw_name" not in self.psm_df.columns:
return
for raw_name, df_group in self.psm_df.groupby("raw_name"):
for _, df_group in self.psm_df.groupby("raw_name"):
self.psm_df.loc[df_group.index, "rt_norm"] = (
df_group.rt_norm / df_group.rt_norm.max()
)
Expand Down Expand Up @@ -510,19 +511,21 @@ def _post_process(self, origin_df: pd.DataFrame):

def filter_psm_by_modifications(
self,
include_mod_set=set(
[
"Oxidation@M",
"Phospho@S",
"Phospho@T",
"Phospho@Y",
"Acetyl@Protein N-term",
]
),
include_mod_set=None,
):
"""
Only keeps peptides with modifications in `include_mod_list`.
"""
if include_mod_set is None:
include_mod_set = set(
[
"Oxidation@M",
"Phospho@S",
"Phospho@T",
"Phospho@Y",
"Acetyl@Protein N-term",
]
)
self._psm_df.mods = self._psm_df.mods.apply(
keep_modifications, mod_set=include_mod_set
)
Expand Down
6 changes: 3 additions & 3 deletions alphabase/quantification/quant_reader/config_dict_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def get_input_type_and_config_dict(input_file, input_type_to_use=None):

uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1).columns)

for input_type in type2relevant_columns.keys():
for input_type in type2relevant_columns:
if (input_type_to_use is not None) and (input_type != input_type_to_use):
continue
relevant_columns = type2relevant_columns.get(input_type)
Expand Down Expand Up @@ -65,7 +65,7 @@ def _load_config(config_yaml):

def _get_type2relevant_cols(config_all):
type2relcols = {}
for type in config_all.keys():
for type in config_all:
config_typedict = config_all.get(type)
relevant_cols = get_relevant_columns_config_dict(config_typedict)
type2relcols[type] = relevant_cols
Expand All @@ -78,7 +78,7 @@ def get_relevant_columns_config_dict(config_typedict):
for filtconf in config_typedict.get("filters", {}).values():
filtcols.append(filtconf.get("param"))

if "ion_hierarchy" in config_typedict.keys():
if "ion_hierarchy" in config_typedict:
for headr in config_typedict.get("ion_hierarchy").values():
ioncols = list(itertools.chain.from_iterable(headr.get("mapping").values()))
dict_ioncols.extend(ioncols)
Expand Down
6 changes: 3 additions & 3 deletions alphabase/quantification/quant_reader/longformat_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def adapt_subtable(input_df_subset, config_dict, use_alphaquant_format):
input_df_subset = quantreader_utils.filter_input(
config_dict.get("filters", {}), input_df_subset
)
if "ion_hierarchy" in config_dict.keys():
if "ion_hierarchy" in config_dict:
return table_reformatter.merge_protein_cols_and_config_dict(
input_df_subset, config_dict, use_alphaquant_format
)
Expand Down Expand Up @@ -240,9 +240,9 @@ def process_with_dask(

def get_hierarchy_names_from_config_dict(config_dict_for_type):
hierarchy_names = []
if "ion_hierarchy" in config_dict_for_type.keys():
if "ion_hierarchy" in config_dict_for_type:
ion_hierarchy = config_dict_for_type.get("ion_hierarchy")
for hierarchy_type in ion_hierarchy.keys():
for hierarchy_type in ion_hierarchy:
hierarchy_names += ion_hierarchy.get(hierarchy_type).get("order")
return list(set(hierarchy_names))
else:
Expand Down
Loading
Loading