In [67]:
#Importing required libraries
import pandas as pd
import numpy as np
import os
import gc
import glob
import zipfile
import shutil


# gc.collect()


### Specify directories
## Run initial Pre-processing for each Bandwidth output

In [92]:
# Specify the path to your output zip file
zip_file_path = 'BWidth3.zip'

# Specify the bandwidth
bw = "bw3"

bandwidth = 3

# Specify the path to the directory for visitors mapping
visitors_map = 'nbpeople_mappings_bw3.csv'

In [93]:
# Specify the directory where you want to extract the contents
extracted_dir_path = 'extracted/'

# Check if the directory exists and is not empty
if os.path.exists(extracted_dir_path) and os.listdir(extracted_dir_path):
    print("Directory is not empty. Clearing contents...")
    # Forcefully remove the directory and its contents
    shutil.rmtree(extracted_dir_path)
    print("Directory contents cleared.")

# Create the directory if it doesn't exist
os.makedirs(extracted_dir_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir_path)

print("Extraction complete.")


Directory is not empty. Clearing contents...
Directory contents cleared.
Extraction complete.


In [94]:
# List of folder names
folders = ["entrance", "hall1", "hall2", "hall3", "shop", "rest", "edge", "metaverse"]

# Iterate through each folder
for folder_name in folders:
    # Define the path to the folder
    folder_path = os.path.join('extracted', folder_name)

    # Check if the folder exists
    if os.path.exists(folder_path):
        # Define the path to the results.csv file
        results_file_path = os.path.join(folder_path, 'results.csv')

        # Check if the results.csv file exists
        if os.path.exists(results_file_path):
            # Define the new filename
            new_filename = f"results_{folder_name}_{bw}_pr.csv"

            # Rename the results.csv file to the new filename
            os.rename(results_file_path, os.path.join(folder_path, new_filename))
            print(f"Renamed results.csv in '{folder_name}' folder to '{new_filename}'")
        else:
            print(f"No results.csv file found in '{folder_name}' folder.")
    else:
        print(f"'{folder_name}' folder does not exist.")

Renamed results.csv in 'entrance' folder to 'results_entrance_bw3_pr.csv'
Renamed results.csv in 'hall1' folder to 'results_hall1_bw3_pr.csv'
Renamed results.csv in 'hall2' folder to 'results_hall2_bw3_pr.csv'
Renamed results.csv in 'hall3' folder to 'results_hall3_bw3_pr.csv'
Renamed results.csv in 'shop' folder to 'results_shop_bw3_pr.csv'
Renamed results.csv in 'rest' folder to 'results_rest_bw3_pr.csv'
Renamed results.csv in 'edge' folder to 'results_edge_bw3_pr.csv'
Renamed results.csv in 'metaverse' folder to 'results_metaverse_bw3_pr.csv'


In [95]:
# Read the DataFrame from the CSV file
df_read = pd.read_csv(visitors_map)

# Convert the DataFrame back to the dictionary format
nbpeople_mappings_read = df_read.set_index('location').T.to_dict()

print("\nMappings read from DataFrame:")
print(nbpeople_mappings_read)



Mappings read from DataFrame:
{'entrance': {'visitor_A': 3, 'visitor_B': 2, 'visitor_C': 1}, 'hall1': {'visitor_A': 5, 'visitor_B': 5, 'visitor_C': 5}, 'hall2': {'visitor_A': 4, 'visitor_B': 0, 'visitor_C': 2}, 'hall3': {'visitor_A': 1, 'visitor_B': 7, 'visitor_C': 5}, 'shop': {'visitor_A': 9, 'visitor_B': 4, 'visitor_C': 2}, 'rest': {'visitor_A': 6, 'visitor_B': 5, 'visitor_C': 4}, 'edge': {'visitor_A': 3, 'visitor_B': 2, 'visitor_C': 10}, 'metaverse': {'visitor_A': 11, 'visitor_B': 14, 'visitor_C': 10}}


In [96]:

period = 60

# Define column names
columns = ["timestamp", "sensorid", "observation", "location", "bandwidth", "payload", "responsetime"]

# Iterate through each folder
for folder_name, mappings in nbpeople_mappings_read.items():
    # Define the path to the result.csv file
    result_file_path = os.path.join('extracted', folder_name, f'results_{folder_name}_{bw}_pr.csv')

    # Check if the file exists
    if os.path.exists(result_file_path):
        # Read the CSV file into a DataFrame with specified column names
        df = pd.read_csv(result_file_path, names=columns)

        # Assuming df is your DataFrame

        # Convert 'timestamp' column to datetime type
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        # print(df.head())

        # Sort the DataFrame by 'timestamp'
        df.sort_values(by='timestamp', inplace=True)

        # Calculate the time range
        time_range = df['timestamp'].max() - df['timestamp'].min()
        print(time_range)

        # Define time intervals
        ### Time interval as set on the \system-prototype\commons\src\devices - temp/motion/rfid was 30secs to reduce file size.
        interval_duration = pd.Timedelta(seconds=period)

        # Define start time for each interval
        start_time_A = df['timestamp'].min()
        start_time_B = start_time_A + interval_duration
        start_time_C = start_time_B + interval_duration

        # Assign values based on time intervals
        df['visitors'] = mappings['visitor_C']  # Default value C
        df.loc[(df['timestamp'] >= start_time_A) & (df['timestamp'] < start_time_B), 'visitors'] = mappings['visitor_A']
        df.loc[(df['timestamp'] >= start_time_B) & (df['timestamp'] < start_time_C), 'visitors'] = mappings['visitor_B']


        # Change the value of 'bandwidth' column to 6
        df['bandwidth'] = bandwidth


        # Define the sampling rate for supersampling
        # supersampling_rate = '5ms'  # Adjust the sampling rate as needed

        # Set the timestamp column as the index
        # df.set_index('timestamp', inplace=True)

        # Resample the DataFrame to supersample the data
        # df = df.resample(supersampling_rate).ffill()

        # Define the systematic sampling rate
        # n = 30  # Select every 10th data point

        # # Choose a random starting point
        # random_start = 0  # Start from the first datapoint

        # # Perform systematic sampling
        # systematic_sample = df.iloc[random_start::n]

        # Save the resulting DataFrame to a CSV file
        # output_file_path = os.path.join('extracted', folder_name, f'results_{folder_name}_modified.csv')
        output_file_path = os.path.join(f'results_{folder_name}_{bw}_mod.csv')
        df.to_csv(output_file_path, index=False)
        print(f"Processed file saved to {output_file_path}")
    else:
        print(f"File not found: {result_file_path}")


0 days 00:01:33.519000
Processed file saved to results_entrance_bw3_mod.csv
0 days 00:01:29.882000
Processed file saved to results_hall1_bw3_mod.csv
0 days 00:01:33.131000
Processed file saved to results_hall2_bw3_mod.csv
0 days 00:01:39.349000
Processed file saved to results_hall3_bw3_mod.csv
0 days 00:01:36.161000
Processed file saved to results_shop_bw3_mod.csv
0 days 00:01:40.203000
Processed file saved to results_rest_bw3_mod.csv


  df = pd.read_csv(result_file_path, names=columns)


0 days 00:02:01.110000
Processed file saved to results_edge_bw3_mod.csv
0 days 00:01:00.222000
Processed file saved to results_metaverse_bw3_mod.csv


In [97]:
# Get a list of all files ending with '_mod.csv'
output_files = glob.glob(f"*{bw}_mod.csv")

# Check if there are any files to concatenate
if output_files:
    # Read each CSV file and concatenate into a single DataFrame
    combined_df = pd.concat([pd.read_csv(file) for file in output_files], ignore_index=True)

    # Save the combined DataFrame to a new CSV file
    # Create the directory if it doesn't exist
    output_directory = './data/'
    os.makedirs(output_directory, exist_ok=True)

    combined_csv_path = f'./data/{bw}_combined_results.csv'
    combined_df.to_csv(combined_csv_path, index=False)

    print(f'Combined CSV file saved at: {combined_csv_path}')
else:
    print('No files ending with "_mod.csv" found in the root directory.')

  combined_df = pd.concat([pd.read_csv(file) for file in output_files], ignore_index=True)


Combined CSV file saved at: ./data/bw3_combined_results.csv


### Run After Obtaining Initial Bandwith combined csv for all locations

In [98]:
# Define a function to map location values
locations = ["entrance", "hall1", "hall2", "hall3", "shop", "rest", "edge", "metaverse"]

def map_location(location):
    location_lower = location.lower()  # Convert location to lowercase
    if location_lower == "entrance":
        return 2
    elif location_lower == "hall1":
        return 1
    elif location_lower == "hall2":
        return 3
    elif location_lower == "hall3":
        return 4
    elif location_lower == "shop":
        return 5
    elif location_lower == "rest":
        return 6
    elif location_lower == "edge":
        return 7
    elif location_lower == "metaverse":
        return 8
    else:
        return None  # Return None for other cases

# Apply the function to the 'location' column
# df['location_encoded'] = df['location'].apply(map_location)

# print(df)


In [99]:
# Define a function to parse and assign values
def assign_value(observation):
    split = observation.split('-')
    last_part = split[0]
    if last_part == 'smartlighting':
        return 1
    elif last_part == 'visitorguiding':
        return 2
    elif last_part == 'maintenance':
        return 3
    elif last_part == 'security':
        return 4
    else:
        return 5  # Return None for cases not specified

# Apply the function to the 'observation' column
# df['sensor'] = df['sensorid'].apply(assign_value)



In [100]:
# Get a list of all files ending with 'combined_results.csv'
output_files = glob.glob('./data/*combined_results.csv')

# Initialize an empty DataFrame to store cleaned data
cleaned_df = pd.DataFrame()

# Loop through each output file
for output in output_files:
    df = pd.read_csv(output)

    # Apply mapping functions
    df['location_encoded'] = df['location'].apply(map_location)
    df['sensor'] = df['sensorid'].apply(assign_value)

    # Select columns to keep
    columns_to_keep = ["bandwidth", "responsetime", "visitors", "sensor", 'location_encoded']
    df_selected = df[columns_to_keep]

    # Append the selected DataFrame to the cleaned DataFrame
    cleaned_df = pd.concat([cleaned_df, df_selected], ignore_index=True)

# Save the combined DataFrame to a new CSV file
output_directory = './data/'
os.makedirs(output_directory, exist_ok=True)
combined_csv_path = f'./data/final_results.csv'
cleaned_df.to_csv(combined_csv_path, index=False)

print(f'Combined CSV file saved at: {combined_csv_path}')



  df = pd.read_csv(output)
  df = pd.read_csv(output)


Combined CSV file saved at: ./data/final_results.csv
