In [1]:
import os
import pandas as pd

In [2]:
# Path to the benign and malware directories
benign_dir = r"C:\Users\KUNG_LOBSTER69\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_CYBER_SECURITY\RESULT\03.sgFCMed\benign"
malware_dir = r"C:\Users\KUNG_LOBSTER69\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_CYBER_SECURITY\RESULT\03.sgFCMed\malware"

def load_prototypes_separately(directory):
    """
    Load each JSON file in the specified directory into separate DataFrames.
    """
    data_frames = {}
    for file_name in os.listdir(directory):
        if file_name.startswith("prototypes_") and file_name.endswith(".json"):
            file_path = os.path.join(directory, file_name)
            try:
                df = pd.read_json(file_path)
                data_frames[file_name] = df  # Store each DataFrame by its file name
            except Exception as e:
                print(f"An error occurred while loading {file_path}: {e}")
    return data_frames

# Load benign prototypes separately
print("Loading benign prototypes...")
benign_prototypes = load_prototypes_separately(benign_dir)
if benign_prototypes:
    print("Loaded benign prototypes successfully!")
    for file_name, df in benign_prototypes.items():
        print(f"Data from {file_name}:")
        print(df.head())

# Load malware prototypes separately
print("\nLoading malware prototypes...")
malware_prototypes = load_prototypes_separately(malware_dir)
if malware_prototypes:
    print("Loaded malware prototypes successfully!")
    for file_name, df in malware_prototypes.items():
        print(f"Data from {file_name}:")
        print(df.head())

Loading benign prototypes...
Loaded benign prototypes successfully!
Data from prototypes_10.json:
                                                data  label  Prototype_Index
0  ÐÏà¡±á                >  þÿ\t            ...      0              872
1  µ0     ÿþ p                            ...      0              135
2  µ0     ÿþ p                            ...      0              689
3  µ0     ÿþ p                            ...      0              932
4  ÐÏà¡±á                >  þÿ\t            ...      0              330
Data from prototypes_100.json:
                                                data  label  Prototype_Index
0  µ0     ÿþ p                            ...      0              733
1  ÐÏà¡±á                >  þÿ\t            ...      0              699
2  µ0     ÿþ p                            ...      0              742
3  µ0     ÿþ p                            ...      0              386
4  µ0     ÿþ p                 

In [3]:
def combine_all_benign_malware_pairs(benign_prototypes, malware_prototypes):
    """
    Combine all possible pairs of benign and malware data into a new DataFrame.
    """
    combined_data = []  # To store all combinations

    # Get the common file names between benign and malware
    common_files = set(benign_prototypes.keys()).intersection(set(malware_prototypes.keys()))
    
    for file_name in common_files:
        benign_df = benign_prototypes[file_name]
        malware_df = malware_prototypes[file_name]

        # Create all possible combinations between benign and malware rows
        for _, benign_row in benign_df.iterrows():
            for _, malware_row in malware_df.iterrows():
                combined_row = {
                    "Benign_Data": benign_row.to_dict(),
                    "Malware_Data": malware_row.to_dict()
                }
                combined_data.append(combined_row)
    
    # Convert the combined data into a new DataFrame
    combined_df = pd.DataFrame(combined_data)
    return combined_df

# Combine all pairs of benign and malware prototypes
print("Combining all pairs of benign and malware prototypes...")
all_pairs_combined_df = combine_all_benign_malware_pairs(benign_prototypes, malware_prototypes)

# Display the combined data
if not all_pairs_combined_df.empty:
    print("All pairs combined successfully!")
    print(all_pairs_combined_df.head())


Combining all pairs of benign and malware prototypes...
All pairs combined successfully!
                                         Benign_Data  \
0  {'data': 'µ0     ÿþ p                  ...   
1  {'data': 'µ0     ÿþ p                  ...   
2  {'data': 'µ0     ÿþ p                  ...   
3  {'data': 'µ0     ÿþ p                  ...   
4  {'data': 'µ0     ÿþ p                  ...   

                                        Malware_Data  
0  {'data': 'ÐÏà¡±á                >  þÿ	    ...  
1  {'data': 'µ0     ÿþ p                  ...  
2  {'data': 'µ0     ÿþ p                  ...  
3  {'data': 'µ0     ÿþ p                  ...  
4  {'data': 'µ0     ÿþ p                  ...  
