# Libraries Installs and Imports

In [63]:
import os
import re
import glob
import time
from datetime import datetime
import pandas as pd
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
import matplotlib.pyplot as plt

## Potency - VaxiJen v2.0

In [64]:
def Evaluate_VaxiJen(Sequence, Type_epitope):
    """
    Evaluate a given biological sequence using the VaxiJen web tool.

    Parameters:
    - Sequence (str): The biological sequence to be evaluated.
    - Type_epitope (int): The type of epitope for evaluation (1 for linear, 2 for discontinuous).

    Returns:
    - str or int: The evaluation result value if successful, 0 if an error occurs during the process.

    Raises:
    - Exception: An exception is raised if there is an error during the execution.

    Note:
    - This function uses the Microsoft Edge browser and interacts with the VaxiJen web tool (http://www.ddg-pharmfac.net/vaxijen/VaxiJen/VaxiJen.html).

    Example:
    result = Evaluate_VaxiJen("AFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITA...", 1)
    print(result)
    """
    try:
        # Start a new instance of the Microsoft Edge browser
        browser = webdriver.Edge()

        # Navigate to the VaxiJen webpage
        browser.get("http://www.ddg-pharmfac.net/vaxijen/VaxiJen/VaxiJen.html")

        # Find the textarea element by its XPath
        textarea = browser.find_element(By.XPATH, "/html/body/div/table/tbody/tr[4]/td[3]/form/table/tbody/tr[1]/td[2]/p/textarea")

        # Clear any existing text in the textarea (optional)
        textarea.clear()

        # Enter text into the textarea
        textarea.send_keys(Sequence)

        # Enter Epitope Type
        option = browser.find_element(By.XPATH, f"/html/body/div/table/tbody/tr[4]/td[3]/form/table/tbody/tr[2]/td[2]/p/select/option[{Type_epitope}]")

        # Click on the option
        option.click()

        # Find the Submit Button
        submit_button = browser.find_element(By.XPATH, "/html/body/div/table/tbody/tr[4]/td[3]/form/table/tbody/tr[3]/td[2]/input[1]")

        # Click the submit button
        submit_button.click()

        # Extract value
        result_value = browser.find_element(By.XPATH, "/html/body/div/table/tbody/tr[4]/td[3]/table/tbody/tr/td/b[3]")

        # Extract the text (value) from the element
        value = result_value.text

        # Close the browser when done
        browser.quit()

        # Return the value of evaluation
        return value
    
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        # Return 0 if an error occurs during the extraction
        return 0

## Safety - AllerTop

In [65]:
def Evaluate_AllerTop(Sequence):
    """
    Evaluate a given biological sequence for allergenicity using the AllerTOP web tool.

    Parameters:
    - Sequence (str): The biological sequence to be evaluated.

    Returns:
    - str: The evaluation result value.

    Note:
    - This function uses the Microsoft Edge browser and interacts with the AllerTOP web tool (https://www.ddg-pharmfac.net/AllerTOP/).

    Example:
    result = Evaluate_AllerTop("AFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITA...")
    print(result)
    """
    # Start a new instance of the Microsoft Edge browser
    browser = webdriver.Edge()

    # Navigate to the AllerTOP webpage
    browser.get("https://www.ddg-pharmfac.net/AllerTOP/")

    # Find the textarea element by its XPath
    textarea = browser.find_element(By.XPATH, "/html/body/center/table/tbody/tr[3]/td/table/tbody/tr/td[2]/table/tbody/tr/td/form/table/tbody/tr[2]/td[1]/textarea")

    # Clear any existing text in the textarea (optional)
    textarea.clear()

    # Enter text into the textarea
    textarea.send_keys(Sequence)

    # Find the Submit Button
    submit_button = browser.find_element(By.XPATH, "/html/body/center/table/tbody/tr[3]/td/table/tbody/tr/td[2]/table/tbody/tr/td/form/table/tbody/tr[3]/td[1]/input")

    # Click the submit button
    submit_button.click()

    # Extract result
    result_value = browser.find_element(By.XPATH, "/html/body/center/table/tbody/tr[3]/td/table/tbody/tr/td[2]/table/tbody/tr/td/form/table/tbody/tr/td/div/h4[2]")

    # Extract the text (value) from the element
    value = result_value.text

    # Close the browser when done
    browser.quit()

    # Return the value of evaluation
    return value

## Binding Strength - IEDB MHC-I

In [66]:
def Evaluate_MHC_I(Sequence, mhc_i_filename):
    """
    Evaluate a given biological sequence for MHC class I binding using the IEDB (Immune Epitope Database) web tool.

    Parameters:
    - Sequence (str): The biological sequence to be evaluated.

    Note:
    - This function uses the Microsoft Edge browser and interacts with the IEDB MHC class I binding prediction tool (http://tools.iedb.org/mhci/).
    - It extracts relevant information, creates a DataFrame, prints the DataFrame, and saves it to a CSV file named 'IEDB_MHC-I.csv'.

    Example:
    Evaluate_MHC_I("AFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITA...")
    """
    # Start a new instance of the Microsoft Edge browser
    browser = webdriver.Edge()

    # Navigate to the IEDB MHC class I binding prediction webpage
    browser.get("http://tools.iedb.org/mhci/")

    # Wait time for loading
    time.sleep(3)

    # Find the textarea element by its XPath
    textarea = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[3]/td[2]/textarea")

    # Clear any existing text in the textarea (optional)
    textarea.clear()

    # Enter text into the textarea
    textarea.send_keys(Sequence)

    # Find the Allele Button
    submit_button = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[9]/td[1]/div/span[2]/input")

    # Click the Allele button
    submit_button.click()

    # Find the Submit Button
    submit_button = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[14]/th/div/input[2]")

    # Click the submit button
    submit_button.click()

    # Wait time for loading
    time.sleep(60)

    # Find the <pre> element containing the result
    result_pre = browser.find_element(By.XPATH, "/html/body/pre")

    # Extract data from the <pre> element
    result_data = result_pre.text

    # Split the result into lines
    result_lines = result_data.split('\n')

    # Create a list to store the data
    data_list = []

    # Extract relevant columns, limited to the first 1000 rows
    for line in result_lines[3:]:  # Starting from the 4th line where the data begins
        if len(data_list) >= 100:
            break  # Break the loop after 1000 rows
        data = line.split()
        allele, seq_num, start, end, length, peptide, score, percentile_rank = data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]
        data_dict = {
            "allele": allele,
            "seq_num": seq_num,
            "start": start,
            "end": end,
            "length": length,
            "peptide": peptide,
            "score": score,
            "percentile_rank": percentile_rank
        }
        data_list.append(data_dict)

    # Create a DataFrame
    df = pd.DataFrame(data_list)
    df = df.iloc[1:]

    # Print the DataFrame
    print("DataFrame:")
    print(df)

    # Save the DataFrame to a CSV file
    df.to_csv(mhc_i_filename)

    # Close the browser when done
    browser.quit()


## Binding Strength - IEDB MHC-II

In [67]:
def read_latest_downloaded_csv(folder_path="C:\\Users\\Downloads\\Folder"):
    """
    Read the latest downloaded CSV file into a DataFrame.

    Parameters:
    - folder_path (str): Path to the folder containing downloaded CSV files.

    Returns:
    - pd.DataFrame: DataFrame from the latest downloaded CSV file.
    """
    # List all CSV files in the specified folder
    csv_files = glob.glob(os.path.join(folder_path, 'result (*).csv'))

    if not csv_files:
        print("No matching CSV files found.")
        return None

    # Extract the numeric values from filenames
    numeric_values = [int(re.search(r'\((\d+)\)', file).group(1)) for file in csv_files]

    # Find the index of the file with the maximum numeric value
    max_index = numeric_values.index(max(numeric_values))

    # Take the file with the maximum numeric value
    latest_csv_file = csv_files[max_index]

    # Read the latest CSV into a DataFrame
    df = pd.read_csv(latest_csv_file)

    return df

def Evaluate_MHC_II(Sequence, mhc_ii_filename, downloaded_file_path):

    """
    Evaluate a given biological sequence for MHC class II binding using the IEDB (Immune Epitope Database) web tool.

    Parameters:
    - downloaded_file_path  (str): # Define the file path where the document is downloaded.
    - Sequence (str): The biological sequence to be evaluated.

    Note:
    - This function uses the Microsoft Edge browser and interacts with the IEDB MHC class II binding prediction tool (http://tools.iedb.org/mhcii/).
    - It extracts relevant information, downloads the result document, reads it into a DataFrame, prints the DataFrame, and saves a modified version to 'IEDB_MHC-II.csv'.

    Example:
    Evaluate_MHC_II("AFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITA...")
    """
    # Start a new instance of the Microsoft Edge browser
    browser = webdriver.Edge()

    # Navigate to the IEDB MHC class II binding prediction webpage
    browser.get("http://tools.iedb.org/mhcii/")

    # Wait time for loading
    time.sleep(3)

    # Find the textarea element by its XPath
    textarea = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[2]/td[2]/textarea")

    # Clear any existing text in the textarea (optional)
    textarea.clear()

    # Enter text into the textarea
    textarea.send_keys(Sequence)

    # Find the Allele Button
    submit_button = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[8]/td[1]/div/span[3]/input")

    # Click the Allele button
    submit_button.click()

    # Wait time for loading
    time.sleep(5)

    # Find the Submit Button
    submit_button = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[14]/th/div/input[1]")

    # Click the submit button
    submit_button.click()

    # Wait time for loading
    time.sleep(60)

    # Find the link to download the document
    download_link = browser.find_element(By.XPATH, "/html/body/div[3]/div[3]/a")

    # Click the download link
    download_link.click()

    # Wait for the download to complete (you may need to customize this part based on your system)
    time.sleep(10)

    # Read the downloaded CSV into a DataFrame
    df = read_latest_downloaded_csv(downloaded_file_path)

    # Extract only the first 1000 rows
    df = df.head(100)

    # Print the DataFrame
    print("DataFrame:")
    print(df)

    # Save the modified DataFrame to a new CSV file
    df.to_csv(mhc_ii_filename)

    # Close the browser when done
    browser.quit()

## Binding Strength - Linear B Epitope

### Single Model Function

In [68]:
def Evaluate_B_Linear(Sequence, project_name, method_type=6):
    """
    Evaluate a given linear B-cell epitope using the IEDB (Immune Epitope Database) web tool.

    Parameters:
    - Sequence (str): The biological sequence to be evaluated.
    - method_type (int, optional): The method type for B-cell epitope prediction. Default is 6.

    Note:
    - This function uses the Microsoft Edge browser and interacts with the IEDB B-cell epitope prediction tool (http://tools.iedb.org/bcell/).
    - The method_type parameter corresponds to the specific B-cell epitope prediction method (default method_type=6).
    - It extracts relevant information, creates a DataFrame, prints the DataFrame, and saves it to a CSV file named 'IEDB_B_MT_{method_type}.csv'.

    Example:
    Evaluate_B_Linear("AFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITA...", method_type=7)
    """
    # Start a new instance of the Microsoft Edge browser
    browser = webdriver.Edge()

    # Navigate to the IEDB B-cell epitope prediction webpage
    browser.get("http://tools.iedb.org/bcell/")

    # Wait time for loading
    time.sleep(3)

    # Find the textarea element by its XPath
    textarea = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[4]/td/textarea")

    # Clear any existing text in the textarea (optional)
    textarea.clear()

    # Find the Method Button
    method_button_xpath = f"/html/body/div[3]/form/table/tbody/tr[{method_type}]/td/input"
    method_button = browser.find_element(By.XPATH, method_button_xpath)

    # Click the submit button
    method_button.click()

    # Enter text into the textarea
    textarea.send_keys(Sequence)

    # Find the Submit Button
    submit_button = browser.find_element(By.XPATH, "/html/body/div[3]/form/table/tbody/tr[13]/th/div/input[1]")

    # Click the submit button
    submit_button.click()

    # Wait for the results to load (you may adjust the waiting time as needed)
    time.sleep(30)

    # Find the table element containing the data
    table = browser.find_element(By.XPATH, "/html/body/div[3]/table[2]")

    # Extract table data
    table_data = table.get_attribute("outerHTML")

    # Use Pandas to read the table into a DataFrame
    df = pd.read_html(table_data)[0]

    # Print the DataFrame
    print("DataFrame:")
    print(df)

    # Save the DataFrame to a CSV file
    df.to_csv(f"{project_name}_IEDB_B_MT_{method_type}.csv", index=False)

    # Close the browser when done
    browser.quit()


### Multiple Model Function

In [69]:
def combine_csv_files(csv_file_names, b_cell_filename):
    """
    Combine multiple CSV files into a single CSV file, excluding files with a "Score" column.

    Parameters:
    - csv_file_names (list): A list of CSV file names to be combined.

    Note:
    - This function reads each CSV file into a DataFrame and combines them into a single DataFrame.
    - CSV files with a "Score" column are skipped during the combination.
    - The combined data is saved to a new CSV file named 'IEDB_B_Cell.csv' in the current working directory.

    Example:
    combine_csv_files(["file1.csv", "file2.csv", "file3.csv"])
    """
    # Initialize an empty DataFrame to store the combined data
    combined_df = pd.DataFrame()

    # Loop through the list of CSV file names
    for file_name in csv_file_names:
        # Read each CSV file into a DataFrame
        df = pd.read_csv(file_name)

        # Check if it has "Score" column
        if "Score" not in df.columns:
            # Add the data to the combined DataFrame
            combined_df = pd.concat([combined_df, df], ignore_index=True)

        else:
            # Skip files with a "Score" column
            print(f"Skipping file {file_name} because it has a Score column")
                  
    # Remove duplicate rows based on the index
    combined_df = combined_df.drop_duplicates()

    # Save the combined DataFrame to a new CSV file
    combined_df.to_csv(b_cell_filename, index=False)

    # Print a message indicating successful completion
    print(f"Combined data saved to IEDB_B_Cell.csv")

In [70]:
def Evaluate_B_All(Sequence, project_name, b_cell_filename):
    """
    Evaluate a given linear B-cell epitope using multiple B-cell epitope prediction methods and combine the results.

    Parameters:
    - Sequence (str): The biological sequence to be evaluated.

    Note:
    - The combined data is saved to a new CSV file named 'IEDB_B_Cell.csv' in the current working directory.

    Example:
    Evaluate_B_All("ATCGATCG...", All=True)
    """
    # Evaluate using all available models (6 to 11)
    for i in range(6, 12):
        Evaluate_B_Linear(Sequence, project_name, method_type=i)
    
    # List of CSV files to combine
    csv_files = [f"{project_name}_IEDB_B_MT_{i}.csv" for i in range(6, 12)]
    combine_csv_files(csv_files, b_cell_filename)

## Loading and Cleaning files

In [71]:
def process_mhc_csv(csv_filename):
    """
    Read a CSV file, drop the first row, remove duplicate rows based on the "peptide" column,
    reset the index, and drop the old index column.

    Parameters:
    - csv_filename (str): Name of the CSV file to process.

    Returns:
    - pd.DataFrame: Modified DataFrame.
    """
    # Read the CSV file into a DataFrame
    MHC = pd.read_csv(csv_filename)

    # Drop the first row
    MHC = MHC.iloc[1:]

    # Remove duplicate rows based on the "peptide" column
    # Check for column names both in uppercase and lowercase
    if "Peptide" in MHC.columns:
        MHC = MHC.drop_duplicates(subset="Peptide")
    elif "peptide" in MHC.columns:
        MHC = MHC.drop_duplicates(subset="peptide")

    # Reset the index and drop the old index column
    MHC.drop(MHC.columns[[0]], axis=1, inplace=True)
    MHC = MHC.reset_index(drop=True)

    return MHC

## Epitope Potency and Safety Filtering

In [72]:
def process_and_save_sequences(sequences, column_name, output_filename, min_antigenicity_score=0.6, Type_epitope=2):
    """
    Process a list of sequences, calculate antigenicity scores, filter non-allergenic peptides, and save the result to a CSV file.
    
    Parameters:
    - sequences (list): List of sequences to process.
    - output_filename (str): Name of the output CSV file.
    - min_antigenicity_score (float, optional): Minimum antigenicity score to consider a peptide non-allergenic (default is 0.6).
    - Type_epitope (int, optional): Type epitope value (default is 2).

    Returns:
    - pd.DataFrame: DataFrame containing non-allergenic peptides.
    """
    
    # Create a DataFrame with the 'Peptide' column
    df = pd.DataFrame(sequences, columns=['Peptide'])

    # Initialize lists to store non-allergenic rows
    non_allergenic_peptides = []
    
    # Iterate through rows
    for index, row in df.iterrows():
        
        sequence = str(row[column_name])  # Convert sequence to string

        try:
            # Calculate antigenicity score
            antigenicity_score = float(Evaluate_VaxiJen(sequence, Type_epitope))
            
            # Calculate peptide length
            peptide_length = len(sequence)

            # Add columns to the original DataFrame
            df.at[index, "Antigenicity_score"] = antigenicity_score
            df.at[index, "peptide_length"] = peptide_length

            # Check if the peptide is non-allergenic based on the antigenicity score
            if antigenicity_score >= min_antigenicity_score:
                allergenicity = Evaluate_AllerTop(sequence)

                # Check if Allergenicity is 'PROBABLE NON-ALLERGEN'
                if allergenicity == 'PROBABLE NON-ALLERGEN':
                    # Append the non-allergenic row to the list
                    non_allergenic_peptides.append([sequence, antigenicity_score, peptide_length])
                    print("Epitope Index: " + str(index) + " Epitope Sequence: " + sequence + " Epitope Antigenicity: " + str(antigenicity_score))

        except ValueError as ve:
            print(f"ValueError: {ve}")
        except Exception as e:
            print(f"An error occurred: {str(e)}")

    # Create a DataFrame for non-allergenic peptides
    non_allergenic_df = pd.DataFrame(non_allergenic_peptides, columns=['Peptide', 'Antigenicity_score', 'peptide_length'])

    # Save non-allergenic peptides to CSV
    non_allergenic_df.to_csv(output_filename, index=False)

    return non_allergenic_df

## Mergin potent and safe epitopes with overlapping

In [73]:
def merge_sequences(sequences, min_overlap):
    """
    Merge sequences based on a specified minimum overlap.

    Parameters:
    - sequences (list): List of input sequences.
    - min_overlap (int): Minimum required overlap for merging.

    Returns:
    - list: List of merged sequences.
    """
    merged_sequences = []

    while sequences:
        current_sequence = sequences.pop(0)

        # Check if the current sequence can be extended with any other sequence
        extended = False
        for other_sequence in sequences:
            overlap = find_overlap(current_sequence, other_sequence, min_overlap)
            if overlap >= min_overlap:
                current_sequence = current_sequence + other_sequence[overlap:]
                sequences.remove(other_sequence)
                extended = True
                break

        # If no extension occurred, add the current sequence to the merged sequences
        if not extended:
            merged_sequences.append(current_sequence)

    return merged_sequences


def find_overlap(seq1, seq2, min_overlap):
    """
    Find the length of the overlap between two sequences.

    Parameters:
    - seq1 (str): First sequence.
    - seq2 (str): Second sequence.
    - min_overlap (int): Minimum required overlap.

    Returns:
    - int: Length of the overlap.
    """
    overlap = 0
    for i in range(min_overlap, min(len(seq1), len(seq2))):
        if seq1[-i:] == seq2[:i]:
            overlap = i
            break

    return overlap

## Merge peptide regions with linkers

In [74]:
def merge_with_separator(items, separator):
    """
    Merge a list of items into a string with a specified separator.

    Parameters:
    - items (list): List of items to be merged.
    - separator (str): Separator to be used between items.

    Returns:
    - str: Merged string.
    """
    if not items:
        return ""

    merged_string = str(items[0])

    for item in items[1:]:
        merged_string += separator + str(item)

    return merged_string

## Join all used MHC-I, MHC-II, B Cell epitopes

In [75]:
def combine_epitopes_files(csv_file_names):
    """
    Combine multiple CSV files into a single CSV file, removing duplicate rows based on the index.

    Parameters:
    - csv_file_names (list): A list of CSV file names to be combined.

    Note:
    - This function reads each CSV file into a DataFrame and combines them into a single DataFrame.
    - Duplicate rows based on the index are removed.
    - The combined data is saved to a new CSV file named 'Full_Epitopes.csv' in the current working directory.

    Example:
    combine_epitopes_files(["file1.csv", "file2.csv", "file3.csv"])
    """
    # Initialize an empty DataFrame to store the combined data
    combined_df = pd.DataFrame()

    # Loop through the list of CSV file names
    for file_name in csv_file_names:
        # Read each CSV file into a DataFrame
        df = pd.read_csv(file_name)
        
        # Combine the data into the DataFrame
        combined_df = pd.concat([combined_df, df], ignore_index=True) 
    
    # Remove duplicate rows based on the index
    combined_df = combined_df.drop_duplicates()

    # Save the combined DataFrame to a new CSV file
    combined_df.to_csv("Full_Epitopes.csv", index=False)

    # Print a message indicating successful completion
    print(f"Combined data saved to Full_Epitopes.csv")

# Pipeline

In [76]:
from datetime import datetime

def vaccine(
    Sequence,
    mhc_i_output="IEDB_MHC-I.csv",
    mhc_ii_output="IEDB_MHC-II.csv",
    downloaded_file_path="C:\\Users\\Download\\Folder", 
    b_cell_output="IEDB_B_Cell.csv",
    m_s_p_mhc_i_output="M-S-P_IEDB_MHC-I.csv",
    m_s_p_mhc_ii_output="M-S-P_IEDB_MHC-II.csv",
    m_s_p_b_cell_output="M-S-P_IEDB_B_Cell.csv",
    full_epitopes_output="Full_Epitopes.csv",
    min_required_overlap=3,
    mhc_i_separator="AAY",
    mhc_ii_separator="GPGPG",
    b_cell_separator="KK",
    min_antigenicity_score=0.6,
):
    """
    Process a given sequence through multiple steps to generate a vaccine design.

    Parameters:
    - Sequence (str): Input sequence to be processed.
    - project_name (str): Project name of the current vaccine design.
    - mhc_i_output (str): Filename for MHC-I evaluation results.
    - mhc_ii_output (str): Filename for MHC-II evaluation results.
    - downloaded_file_path (str): Downloads file folder location.
    - b_cell_output (str): Filename for B Cell evaluation results.
    - m_s_p_mhc_i_output (str): Output name for M-S-P MHC-I processed file.
    - m_s_p_mhc_ii_output (str): Output name for M-S-P MHC-II processed file.
    - m_s_p_b_cell_output (str): Output name for M-S-P B Cell processed file.
    - full_epitopes_output (str): Output name for the combined full epitopes file.
    - min_required_overlap (int): Minimum required overlap for merging sequences.
    - mhc_i_separator (str): Separator for merging MHC-I sequences.
    - mhc_ii_separator (str): Separator for merging MHC-II sequences.
    - b_cell_separator (str): Separator for merging B Cell sequences.
    - min_antigenicity_score (float): Minimum antigenicity score to consider a peptide non-allergenic.

    Returns:
    - str: Merged and processed vaccine design.
    """
    try:  
        # Step 0: Check the title and create folder
        project_name = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

        folder_name = f"{project_name}_vaccine_design"
        os.makedirs(folder_name, exist_ok=True)

        # Step 1: Evaluate MHC-I
        print(f"Step 1: Evaluating MHC-I for {project_name}")
        #Evaluate_MHC_I(Sequence, f"{project_name}_{mhc_i_output}")
        print("                        ")

        # Step 2: Evaluate MHC-II
        print(f"Step 2: Evaluating MHC-II for {project_name}")
        #Evaluate_MHC_II(Sequence, f"{project_name}_{mhc_ii_output}", downloaded_file_path)
        print("                        ")

        # Step 3: Evaluate Linear B
        print(f"Step 3: Evaluating Linear B for {project_name}")
        Evaluate_B_All(Sequence, project_name, f"{project_name}_{b_cell_output}")
        print("                        ")

        # Step 4: Extract MHC-I
        print(f"Step 4: Extracting MHC-I from {project_name}_{mhc_i_output}")
        #MHC_1 = process_mhc_csv(f"{project_name}_{mhc_i_output}")
        print("                        ")

        # Step 5: Extract MHC-II
        print(f"Step 5: Extracting MHC-II from {project_name}_{mhc_ii_output}")
        #MHC_2 = process_mhc_csv(f"{project_name}_{mhc_ii_output}")
        print("                        ")

        # Step 6: Extract B Cell
        print(f"Step 6: Extracting B Cell from {project_name}_{b_cell_output}")
        B_CELL = process_mhc_csv(f"{project_name}_{b_cell_output}")
        print("                        ")

       # Step 7: Evaluate Cleaned MHC-I
        print("Step 7: Evaluating Overlapped MHC-I and saving to", f"{project_name}_{m_s_p_mhc_i_output}")
        #sequences = MHC_1["peptide"].to_list()
        #M_S_P_IEDB_MHC_I = process_and_save_sequences(sequences, "Peptide", f"{project_name}_{m_s_p_mhc_i_output}", min_antigenicity_score, Type_epitope=2)
        print("                        ")

        # Step 8: Evaluate Cleaned MHC-II
        print("Step 8: Evaluating Overlapped MHC-II and saving to", f"{project_name}_{m_s_p_mhc_ii_output}")
        #sequences = MHC_2["peptide"].to_list()
        #M_S_P_IEDB_MHC_II = process_and_save_sequences(sequences, "Peptide", f"{project_name}_{m_s_p_mhc_ii_output}", min_antigenicity_score, Type_epitope=2)
        print("                        ")

        # Step 9: Evaluate Cleaned B Cell
        print("Step 9: Evaluating Overlapped B Cell and saving to", f"{project_name}_{m_s_p_b_cell_output}")
        sequences = B_CELL["peptide"].to_list()
        M_S_P_IEDB_B_CELL = process_and_save_sequences(sequences, "peptide", f"{project_name}_{m_s_p_b_cell_output}", min_antigenicity_score, Type_epitope=2)
        print("                        ")

        # Step 10: Combine the Evaluated Epitopes Files
        #csv_files = [f"{project_name}_{m_s_p_mhc_i_output}", f"{project_name}_{m_s_p_mhc_ii_output}", f"{project_name}_{m_s_p_b_cell_output}"]
        #combine_epitopes_files(csv_files)
        print("                        ")

        # Step 11: Overlap MHC-I
        #sequences_to_merge = M_S_P_IEDB_MHC_I["Peptide"].to_list()
        #M_S_P_IEDB_MHC_I_Overlapped = merge_sequences(sequences_to_merge, min_required_overlap)
        print("                        ")

        # Step 12: Overlap MHC-II
        #sequences_to_merge = M_S_P_IEDB_MHC_II["Peptide"].to_list()
        #M_S_P_IEDB_MHC_II_Overlapped = merge_sequences(sequences_to_merge, min_required_overlap)
        print("                        ")

        # Step 13: Overlap B Cell
        sequences_to_merge = M_S_P_IEDB_B_CELL["Peptide"].to_list()
        M_S_P_IEDB_B_Cell_Overlapped = merge_sequences(sequences_to_merge, min_required_overlap)
        print("                        ")

        # Step 14: Merge Sequence MHC-I
        items_list = M_S_P_IEDB_MHC_I_Overlapped
        M_S_P_IEDB_MHC_I_Merged = merge_with_separator(items_list, mhc_i_separator)
        print("                        ")

        # Step 15: Merge Sequence MHC-II
        items_list = M_S_P_IEDB_MHC_II_Overlapped
        M_S_P_IEDB_MHC_II_Merged = merge_with_separator(items_list, mhc_ii_separator)
        print("                        ")

        # Step 16: Merge Sequence B Cell
        items_list = M_S_P_IEDB_B_Cell_Overlapped
        M_S_P_IEDB_B_Cell_Merged = merge_with_separator(items_list, b_cell_separator)
        print("                        ")

        # Step 17: Merge Epitopes
        items_list = [M_S_P_IEDB_MHC_I_Merged, M_S_P_IEDB_MHC_II_Merged, M_S_P_IEDB_B_Cell_Merged]
        Vaccine_design = merge_with_separator(items_list, "")
        print("                        ")

        # Step 18: Move generated files to the new folder
        generated_files = [
            f"{project_name}_{mhc_i_output}",
            f"{project_name}_{mhc_ii_output}",
            f"{project_name}_{b_cell_output}",
            *[f"{project_name}_IEDB_B_MT_{i}.csv" for i in range(6, 12)],
            f"{project_name}_{m_s_p_mhc_i_output}",
            f"{project_name}_{m_s_p_mhc_ii_output}",
            f"{project_name}_{m_s_p_b_cell_output}",
            full_epitopes_output,
        ]

        for file_name in generated_files:
            os.rename(file_name, os.path.join(folder_name, file_name))

        print(f"Generated files moved to folder: {folder_name}")

        return Vaccine_design

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

In [77]:
Sequence = "FHLTTRDGEPHMIVAKQERGKSLLFKTAAGINMCTLIAMDLGELCEDTVTYKCPLIAEAEPEDIDCWCNLTSTWVTYGTCTQTGEHRRDKRSVALAPHVGMGLETRTETWMSSEGAWKHIQRVETWALRHPGFTILALFLAYAIGTSLTQRTVIFILLMLVAPSMTMRCVGIGNRDFVEGVSGATWVDVVLEHGSCVTTMAKNKPTLDIELIKTEAKQPATLRKYCIEAKISNITTDSRCPTQGEPNLNEEQDQNYVCRRTNVDRGWGNGCGLFGKGSLVTCAKFSCKKKIEGKIVQYENLKYTVIVTVHTGDQHAVGNDTSNHGVTATITPQAPTVEAQLPDYGTLTLDCSPRTGLDFNEMILLTMKNKAWLVHRQWFLDLPLPWTSGADTSNNTWNRKELLVTFKNPHAKKQDVVVLGSQEGAMHTALTGATEIQSSGGNNIFAGHLKCRLRMDKLRLKGMSYAMCTGKFKLDKEVAETQHGTILIKVKYEGADAPCKIPFSIRDVKKRAVNGRLITANPIVTDKEKPVNIEAEPPFGDSYIVIGVGPKALKLNWFKKGSSIGKMFEATARGARRMAILGDTAWDFGSVGGVFTSLGKAVHQVFGSAYTALFSGVSWTMKILIGVLLTWIGLNSRNTSMSMSCIAVGNITLYLGAMVQA	"
vaccine(Sequence, downloaded_file_path="C:\\Users\\raula\\Downloads", min_required_overlap=3, mhc_i_separator="AAY", mhc_ii_separator="GPGPG", b_cell_separator="KK", min_antigenicity_score=0.6)

Step 1: Evaluating MHC-I for 2023_11_14_20_05_14
                        
Step 2: Evaluating MHC-II for 2023_11_14_20_05_14
                        
Step 3: Evaluating Linear B for 2023_11_14_20_05_14


DataFrame:
    No.  Start  End                                     Peptide  Length
0     1     17   24                                    QERGKSLL       8
1     2     28   28                                           A       1
2     3     46   51                                      EDTVTY       6
3     4     55   63                                   LIAEAEPED       9
4     5     81  122  TQTGEHRRDKRSVALAPHVGMGLETRTETWMSSEGAWKHIQR      42
5     6    147  148                                          SL       2
6     7    214  221                                    TEAKQPAT       8
7     8    234  269        ITTDSRCPTQGEPNLNEEQDQNYVCRRTNVDRGWGN      36
8     9    312  325                              GDQHAVGNDTSNHG      14
9    10    356  358                                         GLD       3
10   11    379  400                      FLDLPLPWTSGADTSNNTWNRK      22
11   12    408  412                                       NPHAK       5
12   13    435  443                                  