### Preprocessing IoT Description files


In [None]:
import json
import pandas as pd
import glob
import os

# Function to split the applicationId and select the part after the first underscore
def extract_after_underscore(app_id):
    return app_id.split('_')[1] if '_' in app_id else app_id

# Initialize an empty list to store DataFrames
all_metrics_dfs = []

# Specify the directory where JSON files are located
directory = './Jsonfolder'

# Step 2: Read the CSV file into a DataFrame
csv_file = 'response-times-dataset.csv'
df = pd.read_csv(csv_file)

# Iterate over all JSON files in the directory
for json_file_path in glob.glob(os.path.join(directory, '*.json')):
    # Extract the base name (without extension) from the file path
    base_name = os.path.splitext(os.path.basename(json_file_path))[0]

    # Print the base name
    print(base_name)

    # Load JSON data from the file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Convert IoT Devices to DataFrame
    iot_devices = pd.DataFrame(data['IoTdevices'])
    iot_devices['publishesTo'] = iot_devices['publishesTo'].apply(lambda x: ';'.join(x))

    # Convert Applications to DataFrame
    applications = pd.DataFrame(data['applications'])
    applications['subscribesTo'] = applications['subscribesTo'].apply(lambda x: ';'.join(x))
    # applications['applicationCategory'] = applications['applicationCategory']


    # Convert System Configuration to DataFrame
    system_config = {
        'systemBandwidth': [data['systemBandwidth']],
        'bandwidthPolicy': [data['bandwidthPolicy']],
        'priorityPolicy': [data['priorityPolicy']],
        'commChannelLossAN': [data['commChannelLossAN']],
        'commChannelLossRT': [data['commChannelLossRT']],
        'commChannelLossTS': [data['commChannelLossTS']],
        'commChannelLossVS': [data['commChannelLossVS']],
        'brokerCapacity': [data['brokerCapacity']]
    }
    system_config_df = pd.DataFrame(system_config)

    # Extract the required columns from the CSV DataFrame
    columns_of_interest = ['topic', 'app', base_name]

    # Filter out columns that do not exist in the DataFrame
    columns_of_interest = [col for col in columns_of_interest if col in df.columns]

    # Create a DataFrame with the filtered columns
    df_filtered = df[columns_of_interest].copy() if columns_of_interest else pd.DataFrame()

    if df_filtered.empty:
        print(f"No columns of interest found in {csv_file}. Skipping this file.")
        continue

    # Add a key for cross join
    df_filtered['key'] = 1
    system_config_df['key'] = 1

    # Merge the DataFrames using the cross join approach
    merged_df = pd.merge(df_filtered, system_config_df, on='key').drop('key', axis=1)

    # Apply the function to the applicationId column
    applications['applicationId'] = applications['applicationId'].apply(extract_after_underscore)

    # Perform the merge with applications DataFrame
    result_df = pd.merge(merged_df, applications, left_on='app', right_on='applicationId', how='left')

    # Apply the function to the publishesTo column
    iot_devices['publishesTo'] = iot_devices['publishesTo'].apply(extract_after_underscore)

    # Perform the merge with iot_devices DataFrame
    final_df = pd.merge(result_df, iot_devices, left_on='topic', right_on='publishesTo', how='left')

    # Create a new column 'priorityID' and populate it with the base_name
    final_df['priorityID'] = base_name

    final_df['scenario_case'] = 100

    # Select and rename columns
    columns_to_select = ['topic', 'app', 'applicationCategory', base_name, 'systemBandwidth', 'commChannelLossAN',
                          'commChannelLossRT', 'commChannelLossTS', 'commChannelLossVS', 'brokerCapacity',
                          'priority', 'priorityID', 'processingRate', 'publishFrequency', 'messageSize', 'scenario_case']

    # Filter out columns that do not exist in the DataFrame
    columns_to_select = [col for col in columns_to_select if col in final_df.columns]

    final_df_filtered = final_df[columns_to_select] if columns_to_select else pd.DataFrame()

    if final_df_filtered.empty:
        print(f"No columns to select in the final DataFrame. Skipping this iteration.")
        continue

    final_df_filtered = final_df_filtered.rename(columns={base_name: 'latency'})

    # Append the final DataFrame to the list
    all_metrics_dfs.append(final_df_filtered)

# Concatenate all DataFrames in the list into one DataFrame
if all_metrics_dfs:
    metrics_df = pd.concat(all_metrics_dfs, ignore_index=True)
    # Print or save the DataFrame
    print(metrics_df.head(2))
else:
    print("No dataframes to concatenate.")



dropVS15AN15
dropVS10
prioRTVSTSAN
prioRT
dropVS10AN10RT10
dropRT10
prioTS
prioRTVS
baseline
prioritizeTopics
maxmin
prioVS
prioAN
dropVS10AN10
dropAN10
plannerConfiguration
        topic    app applicationCategory   latency  systemBandwidth  \
0  amazonecho  app10                  AN  5.406390              650   
1  amazonecho  app14                  TS  5.252253              650   

   commChannelLossAN  commChannelLossRT  commChannelLossTS  commChannelLossVS  \
0               0.15                0.0                  0               0.15   
1               0.15                0.0                  0               0.15   

   brokerCapacity  priority    priorityID  processingRate  publishFrequency  \
0             100         0  dropVS15AN15            1000                 1   
1             100         0  dropVS15AN15            1000                 1   

   messageSize  scenario_case  
0    105869.28            100  
1    105869.28            100  


In [None]:
print(len(metrics_df))

metrics_df.to_csv('100Subs_metrics_df.csv', index=False)

1600


### Concat for Scenario 3 - Scalabiltiy


In [None]:

# concat_files = ['100Subs_metrics_df.csv', '80Subs_metrics_df.csv', '60Subs_metrics_df.csv', '40Subs_metrics_df.csv', '20Subs_metrics_df.csv']

import pandas as pd

# List of file names
concat_files = [
    '100Subs_metrics_df.csv',
    '80Subs_metrics_df.csv',
    '60Subs_metrics_df.csv',
    '40Subs_metrics_df.csv',
    '20Subs_metrics_df.csv'
]

# Read and concatenate all files
dfs = [pd.read_csv(file) for file in concat_files]
combined_df = pd.concat(dfs, ignore_index=True)

# Optionally, save the combined DataFrame to a new CSV file
combined_df.to_csv('combined_metrics_df.csv', index=False)

# Print the first few rows of the combined DataFrame
print(combined_df.head(1))



        topic    app applicationCategory   latency  systemBandwidth  \
0  amazonecho  app10                  AN  5.406390              650   
1  amazonecho  app14                  TS  5.252253              650   
2  amazonecho  app19                  TS  5.247047              650   
3  amazonecho  app21                  RT  5.344094              650   
4  amazonecho  app27                  RT  5.378431              650   

   commChannelLossAN  commChannelLossRT  commChannelLossTS  commChannelLossVS  \
0               0.15                0.0                  0               0.15   
1               0.15                0.0                  0               0.15   
2               0.15                0.0                  0               0.15   
3               0.15                0.0                  0               0.15   
4               0.15                0.0                  0               0.15   

   brokerCapacity  priority    priorityID  processingRate  publishFrequency  \
0      

### Delete folders


In [None]:
import shutil
import os

def delete_folder_contents(folder_path):
    # Check if the folder exists
    if os.path.exists(folder_path):
        # Iterate over all files and folders in the directory
        for item in os.listdir(folder_path):
            item_path = os.path.join(folder_path, item)
            # Check if it's a file or directory and delete accordingly
            if os.path.isfile(item_path):
                os.remove(item_path)
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path)
    else:
        print(f"Folder '{folder_path}' does not exist.")

folder_path = './Jsonfolder'
# folder_path = './'
delete_folder_contents(folder_path)
