## Import package

In [None]:
import sys
sys.path.append('/ET_OptME/script')
import ETGEMs_function_protain as etgf
from ETGEMs_function_protain import *
from ET_optme import *
import pandas as pd
import cobra
import gurobipy
import json
import multiprocessing
import os
from multiprocessing import Pool
from sympy import subsets
import pandas as pd
import matplotlib.pyplot as plt
import re

## Find examples of advantages (validate the universality of advantages)

## 1.OptForce: overlaped Reaction Range  ET-OptForce:Separated Enzyme Concentration Range

In [None]:
# New version 8.4
import os
import pandas as pd

base_dir = './result/ET_Optme/result'

# Traverse all folders in base_dir that do not start with 'F'
for folder_name in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder_name)
    if os.path.isdir(folder_path) and not folder_name.startswith('F'):
        results_s_path = os.path.join(folder_path, 'results_S.xlsx')
        results_et_path = os.path.join(folder_path, 'results_ET.xlsx')

        if os.path.exists(results_s_path) and os.path.exists(results_et_path):
            # Read the Excel files
            df = pd.read_excel(results_s_path)
            et_df = pd.read_excel(results_et_path)

            # Drop specified columns
            if 'Unnamed: 0.1' in df.columns:
                df = df.drop('Unnamed: 0.1', axis=1)
            if 'Unnamed: 0' in df.columns:
                df = df.drop('Unnamed: 0', axis=1)
            if 'Unnamed: 0.1' in et_df.columns:
                et_df = et_df.drop('Unnamed: 0.1', axis=1)
            if 'Unnamed: 0' in et_df.columns:
                et_df = et_df.drop('Unnamed: 0', axis=1)

            # Filter rows where the 'manipulations' column is 'down' or 'Up'
            s_df = df[(df['manipulations'] == 'down') | (df['manipulations'] == 'Up')]
            etm_df = et_df[(et_df['manipulations'] == 'down') | (et_df['manipulations'] == 'Up')]

            # Find unique genes in et_df
            unique_genes = etm_df[~etm_df['gene'].isin(s_df['gene'])]

            # Extract all gene names from the gene column in the unique_genes DataFrame
            genes = unique_genes['gene'].unique()

            # Find corresponding reaction, flux_wild, and flux_over rows in df for the same genes
            filtered_df = df[df['gene'].isin(genes)]

            # Merge data
            merged_df = pd.merge(unique_genes, filtered_df, on='gene', how='left')

            # Fill NaN values with an empty string
            merged_df['reaction'] = merged_df['reaction'].fillna('')

            # Remove rows where the 'reaction' column contains 'reverse'
            merged_df = merged_df[~merged_df['reaction'].str.contains('reverse')]

            # Add 'type' column and assign value 'Overlaped Reaction Range'
            merged_df['type'] = 'Overlaped Reaction Range'

            # Save the results to the corresponding folder
            output_path = os.path.join(folder_path, 'save_solution.csv')
            merged_df.to_csv(output_path, index=False)

            # Print the number of rows in merged_df
            print(f"Processed folder: {folder_name}, unique genes saved to: {output_path}, number of unique genes: {len(merged_df)}")

print("All folders processed.")


Processed folder: ala, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/result/ala/save_solution.csv, number of unique genes: 98
Processed folder: ala_b, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/result/ala_b/save_solution.csv, number of unique genes: 86
Processed folder: glu, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/result/glu/save_solution.csv, number of unique genes: 95
Processed folder: lys, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/result/lys/save_solution.csv, number of unique genes: 101
Processed folder: val, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/result/val/save_solution.csv, number of unique genes: 93
All folders processed.


## 2.FSEOF:lack of monotonicity.  ET-FSEOF: Improved monotonicity

In [None]:
import os
import pandas as pd
from cobra.io import read_sbml_model

base_dir = './result/ET_Optme/result'
# Traverse all folders in base_dir that start with 'F'
for folder_name in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder_name)
    if os.path.isdir(folder_path) and folder_name.startswith('F'):
        results_s_path = os.path.join(folder_path, 'results_S.xlsx')
        results_et_path = os.path.join(folder_path, 'results_SET.xlsx')

        if os.path.exists(results_s_path) and os.path.exists(results_et_path):
            # Read the Excel files
            f_df = pd.read_excel(results_et_path)
            sfdf = pd.read_excel(results_s_path)

            # Drop specified columns
            if 'Unnamed: 0.1' in f_df.columns:
                f_df = f_df.drop('Unnamed: 0.1', axis=1)
            if 'Unnamed: 0' in f_df.columns:
                f_df = f_df.drop('Unnamed: 0', axis=1)
            if 'Unnamed: 0' in sfdf.columns:
                sfdf = sfdf.drop('Unnamed: 0', axis=1)

            # Filter rows where the 'manipulations' column is 'down' or 'up'
            f_df = f_df[(f_df['manipulations'] == 'down') | (f_df['manipulations'] == 'up')]
            sfdf = sfdf[(sfdf['manipulation'] == 'down') | (sfdf['manipulation'] == 'up')]

            # Delete the last few specified columns
            cols_to_delete = f_df.columns[-11:-1]
            f_df = f_df.drop(columns=cols_to_delete)

            f_gene = f_df['gene'].to_list()
            reactions_from_sfdf = set(sfdf['reaction'])
            reaction_genes_dict = {}

            for reaction_id in reactions_from_sfdf:
                # Find genes corresponding to the reaction ID
                genes = get_genes_for_reaction(model, reaction_id)
                # Store the reaction ID and gene set in the dictionary
                reaction_genes_dict[reaction_id] = genes

            all_genes = set()
            for genes in reaction_genes_dict.values():
                if isinstance(genes, set):
                    all_genes.update(genes)
                else:
                    all_genes.add(genes)

            # Find unique genes in f_gene:SET
            unique_f_gene = [gene for gene in f_gene if gene not in all_genes]
            # Find rows in f_df corresponding to unique_f_gene genes
            unique_f_gene_df = f_df[f_df['gene'].isin(unique_f_gene)]

            # Filter reactions corresponding to genes
            reaction_set = set()
            for gene_rule in unique_f_gene:
                if 'and' in gene_rule or 'or' in gene_rule:
                    gene_ids = re.split(r'\band\b|\bor\b', gene_rule)
                    for gene_id in gene_ids:
                        add_reactions_to_set(model, gene_id, reaction_set)
                else:
                    add_reactions_to_set(model, gene_rule, reaction_set)

            sfdf = pd.read_excel(results_s_path)
            s_rxn_df = sfdf[sfdf['reaction'].isin(reaction_set)]
            s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
            s_rxn_df.drop(s_rxn_df.columns[-5:-1], axis=1, inplace=True)
            s_rxn_df.rename(columns={'gpr': 'gene'}, inplace=True)

            # Merge data
            merged_df_f = pd.merge(unique_f_gene_df, s_rxn_df, on='gene', how='left')
            merged_df_f['reaction'] = merged_df_f['reaction'].fillna('')
            merged_df_f = merged_df_f[~merged_df_f['reaction'].str.contains('reverse')]
            merged_df_f['type'] = 'lack of monotonicity'

            file_path = os.path.join(folder_path, 'save_solution.xlsx')
            with pd.ExcelWriter(file_path) as writer:
                merged_df_f.to_excel(writer, sheet_name='imperfection', index=False)
                unique_f_gene_df.to_excel(writer, sheet_name='unique_etm_genes', index=False)

            print(f"Processed folder: {folder_name}, unique genes saved to: {file_path}, number of unique genes: {len(merged_df_f)}")

print("All folders processed.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_ala, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_ala/save_solution.xlsx, number of unique genes: 83


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed folder: F_glu, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_glu/save_solution.xlsx, number of unique genes: 2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_ile, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_ile/save_solution.xlsx, number of unique genes: 15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_lys, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_lys/save_solution.xlsx, number of unique genes: 14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_val, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_val/save_solution.xlsx, number of unique genes: 19


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_ala_b, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_ala_b/save_solution.xlsx, number of unique genes: 221


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df[['gpr', 'equation']] = s_rxn_df['reaction'].apply(lambda x: pd.Series(get_reaction_details(model, x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s_rxn_df.drop(s_rxn_df.columns[-5:-1],

Processed folder: F_hom, unique genes saved to: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/F_hom/save_solution.xlsx, number of unique genes: 231
All folders processed.


## 3. FSEOF:Non-unique Reaction Direction. ET-FSEOF:One Reaction Direction.

In [None]:
import os
import pandas as pd

# Define the root directory
root_directory = './ET_OptME/result/ET_Optme/结果分析/result'

# Traverse the folders and find those containing "FSEOF"
for folder_name in os.listdir(root_directory):
    if 'FSEOF' in folder_name:
        folder_path = os.path.join(root_directory, folder_name)
        
        # Set the path for results_S.xlsx
        result_file_path = os.path.join(folder_path, 'results_S.xlsx')
        
        # Set the path for df_et (assuming it's also in the same folder, adjust file name as needed)
        df_et_file_path = os.path.join(folder_path, 'results_SET.xlsx')  # Adjust based on actual file name
        
        # Check if the result files exist
        if os.path.isfile(result_file_path) and os.path.isfile(df_et_file_path):
            # Read the Excel file
            df = pd.read_excel(result_file_path)

            # Get the desired columns
            target_columns = df.iloc[:, 2:12]

            # Define the check function
            def is_monotonic(column):
                """Check if the column is monotonically increasing or decreasing"""
                is_increasing = all(column[i] <= column[i + 1] for i in range(len(column) - 1))
                is_decreasing = all(column[i] >= column[i + 1] for i in range(len(column) - 1))
                return is_increasing or is_decreasing

            def has_opposite_signs(column):
                """Check if there are adjacent elements with opposite signs"""
                return any(column[i] * column[i + 1] < 0 for i in range(len(column) - 1))

            # Mark the monotonic rows
            monotonic_rows = target_columns.apply(is_monotonic, axis=1)

            # Mark the rows with opposite signs among the monotonic rows
            filtered_rows = target_columns[monotonic_rows].apply(has_opposite_signs, axis=1)

            # Create a new column 'explam' and initialize it to 'no'
            df['explam'] = 'no'

            # Mark the rows that meet the conditions
            df.loc[filtered_rows[filtered_rows].index, 'explam'] = 'yes'

            # Extract rows where 'explam' is 'yes'
            filtered_df = df[df['explam'] == 'yes']
            filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)
            reaction = filtered_df['reaction'].to_list()

            # Read the df_et Excel file
            df_et = pd.read_excel(df_et_file_path)
            df_et['reaction_name'] = df_et['gene = 0.0'].str.extract(r'(\w+)\s*\(')
            gene_df = df_et[df_et['reaction_name'].isin(reaction)]

            # Merge the data
            merged_df_nrd = pd.merge(filtered_df, gene_df, on='gene', how='left')
            merged_df_nrd['type'] = 'Non-unique Reaction Direction'

            # Save the results
            output_file_path = os.path.join(folder_path, 'filtered_results.xlsx')
            merged_df_nrd.to_excel(output_file_path, index=False)

            # Output the number of rows
            print(f'Processed: {result_file_path} -> {output_file_path}, Number of rows: {len(merged_df_nrd)}')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ala/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ala/filtered_results.xlsx, Number of rows: 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ala_b/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ala_b/filtered_results.xlsx, Number of rows: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_glu/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_glu/filtered_results.xlsx, Number of rows: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_hom/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_hom/filtered_results.xlsx, Number of rows: 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ile/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_ile/filtered_results.xlsx, Number of rows: 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_lys/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_lys/filtered_results.xlsx, Number of rows: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'gpr': 'gene'}, inplace=True)


Processed: /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_val/results_S.xlsx -> /home/sun/ETGEMS-10.20/ET_OptME/result/ET_Optme/结果分析/result/FSEOF_val/filtered_results.xlsx, Number of rows: 1
