In [1]:
import os
import pandas as pd
import re

def numerical_key(filename):
    # Extract numbers from the filename
    match = re.search(r'(\d+)\.txt$', filename)
    return int(match.group(1)) if match else float('inf')

def find_shortest_files_in_batches(directory, model_types, batch_size=30):
    shortest_files = []

    # Process each model type
    for model_type in model_types:
        # Build the regular expression for filenames of the current model type
        pattern = f"{model_type}(\\d+)\\.txt$"
        # List all files matching the current model type
        filenames = [f for f in os.listdir(directory) if re.search(pattern, f)]
        # Sort files numerically
        sorted_files = sorted(filenames, key=numerical_key)

        # Process in batches
        for i in range(0, len(sorted_files), batch_size):
            min_length = float('inf')
            shortest_file = None
            current_batch = sorted_files[i:i+batch_size]
            for filename in current_batch:
                file_path = os.path.join(directory, filename)
                try:
                    # Read the file and determine its length
                    data_length = len(pd.read_csv(file_path, delimiter='\t'))
                    if data_length < min_length:
                        min_length = data_length
                        shortest_file = filename
                except Exception as e:
                    print(f"Error reading {filename}: {e}")

            if shortest_file:
                shortest_files.append(shortest_file)

    return shortest_files

# List of model types for 2 CPU core analysis
model_type_2cpu = ['DesicionTree', 'GuassianNaiveBayes', 'KNearestNeighbors', 'MultilayerPerceptron', 'RandomForestClassifier', 'SupportVectorMachine']

directory = './reanalyzed_data_2cpu/'
shortest_files_list_2cpu = find_shortest_files_in_batches(directory, model_type_2cpu)
print(shortest_files_list_2cpu)


# List of model types for 8 CPU core analysis including additional deep learning models
model_type_8cpu = ['DesicionTree', 'GuassianNaiveBayes', 'KNearestNeighbors', 'MultilayerPerceptron', 'RandomForestClassifier', 'SupportVectorMachine',
                   'Resnet18forDepthEstimation', 'Resnet18forImageSemanticSegmentation', 
                   'VGGforImageSemanticSegmentation', 'VGGforObjectDetection',
                   'ViTforDepthEstimation', 'ViTforImageSemanticSegmentation',
                   'yolov8forImageSemanticSegmentation', 'yolov8forObjectDetection',
                   'BARTforTextClassification', 'BERTforTextClassification']

directory = './reanalyzed_data_8cpu/'
shortest_files_list_8cpu = find_shortest_files_in_batches(directory, model_type_8cpu, batch_size = 10)
print(shortest_files_list_8cpu)


['DesicionTree2.txt', 'DesicionTree47.txt', 'DesicionTree63.txt', 'GuassianNaiveBayes27.txt', 'GuassianNaiveBayes33.txt', 'GuassianNaiveBayes68.txt', 'KNearestNeighbors6.txt', 'KNearestNeighbors44.txt', 'KNearestNeighbors74.txt', 'MultilayerPerceptron16.txt', 'MultilayerPerceptron31.txt', 'MultilayerPerceptron74.txt', 'RandomForestClassifier13.txt', 'RandomForestClassifier44.txt', 'RandomForestClassifier85.txt', 'SupportVectorMachine14.txt', 'SupportVectorMachine53.txt', 'SupportVectorMachine76.txt']
['DesicionTree8.txt', 'DesicionTree19.txt', 'DesicionTree25.txt', 'GuassianNaiveBayes5.txt', 'GuassianNaiveBayes12.txt', 'GuassianNaiveBayes28.txt', 'KNearestNeighbors9.txt', 'KNearestNeighbors13.txt', 'KNearestNeighbors29.txt', 'MultilayerPerceptron3.txt', 'MultilayerPerceptron11.txt', 'MultilayerPerceptron24.txt', 'RandomForestClassifier5.txt', 'RandomForestClassifier15.txt', 'RandomForestClassifier22.txt', 'SupportVectorMachine3.txt', 'SupportVectorMachine19.txt', 'SupportVectorMachine2

In [2]:
diff_columns = [
    ['sectors_read', 'sectors_written', 'sectors_discarded'],
    ['time_spent_reading_(ms)', 'time_spent_writing_(ms)', 'time_spent_discarding', 'time_spent_flushing'],
    ['reads_completed_successfully', 'writes_completed', 'discards_completed_successfully', 'flush_requests_completed_successfully'],
    ['reads_merged', 'writes_merged', 'discards_merged'],
    ['I/Os_currently_in_progress', 'time_spent_doing_I/Os_(ms)', 'weighted_time_spent_doing_I/Os_(ms)']
]

standard_columns_2cpu = [['CPUUtilization'],
                    ['Branches', 'Branches_CPU_Using', 'All_Branches_Using'], 
                    ['Branch_Misses', 'Branch_Misses_CPU_Using'], 
                    ['Cache_References', 'Cache_References_CPU_Using', 'All_Cache_Reference_Using'], 
                    ['Cache_Misses', 'Cache_Misses_CPU_Using'],
                    ['Cycles', 'Cycles_CPU_Using'],
                    ['Instructions', 'Instructions_CPU_Using', 'Instructions_per_CPU_Cycle'],
                    ['Last_Level_Cache_Accesses', 'Last_Level_Cache_Accesses_CPU_Using'],
                    ['Load_Dispatches', 'Load_Dispatches_CPU_Using'],
                    ['Storage_Dispatches', 'Storage_Dispatches_CPU_Using'],
                    ['AvgMHz-', 'AvgMHz0', 'AvgMHz1', 'AvgMHz2', 'AvgMHz3'],
                    ['Busy-', 'Busy0', 'Busy1', 'Busy2', 'Busy3'], 
                    ['BzyMHz-', 'BzyMHz0', 'BzyMHz1', 'BzyMHz2', 'BzyMHz3'],
                    ['C1-', 'C10', 'C11', 'C12', 'C13'], 
                    ['C2-', 'C20', 'C21', 'C22', 'C23'],
                    ['CorWatt-', 'CorWatt0', 'CorWatt1', 'CorWatt2', 'CorWatt3'],
                    ['PkgWatt-', 'PkgWatt0'], 
                    ['POLL-'], 
                    ['IRQ-', 'IRQ0', 'IRQ1', 'IRQ2', 'IRQ3'],               
                    ['rxkB/s', 'rxpck/s', 'txkB/s', 'txpck/s', '%util'],
                    ['TCP', 'UDP', 'UNIX', 'RAW', 'SCTP', 'DCCP']]

standard_columns_8cpu = [['CPUUtilization'],
                    ['Branches', 'Branches_CPU_Using', 'All_Branches_Using'], 
                    ['Branch_Misses', 'Branch_Misses_CPU_Using'], 
                    ['Cache_References', 'Cache_References_CPU_Using', 'All_Cache_Reference_Using'], 
                    ['Cache_Misses', 'Cache_Misses_CPU_Using'],
                    ['Cycles', 'Cycles_CPU_Using'],
                    ['Instructions', 'Instructions_CPU_Using', 'Instructions_per_CPU_Cycle'],
                    ['Last_Level_Cache_Accesses', 'Last_Level_Cache_Accesses_CPU_Using'],
                    ['Load_Dispatches', 'Load_Dispatches_CPU_Using'],
                    ['Storage_Dispatches', 'Storage_Dispatches_CPU_Using'],
                    ['AvgMHz-', 'AvgMHz0', 'AvgMHz1'], 
                    ['AvgMHz6', 'AvgMHz7', 'AvgMHz8', 'AvgMHz9'], 
                    ['AvgMHz10', 'AvgMHz11', 'AvgMHz12', 'AvgMHz13'],
                    ['Busy-', 'Busy0', 'Busy1'], 
                    ['Busy6', 'Busy7', 'Busy8', 'Busy9'],
                    ['Busy10', 'Busy11', 'Busy12', 'Busy13'], 
                    ['BzyMHz-', 'BzyMHz0', 'BzyMHz1'], 
                    ['BzyMHz6', 'BzyMHz7', 'BzyMHz8', 'BzyMHz9'], 
                    ['BzyMHz10', 'BzyMHz11', 'BzyMHz12', 'BzyMHz13'],
                    ['C1-', 'C10', 'C11'], 
                    ['C16', 'C17', 'C18', 'C19'], 
                    ['C110', 'C111', 'C112', 'C113'], 
                    ['C2-', 'C20', 'C21'], 
                    ['C26', 'C27', 'C28', 'C29'], 
                    ['C210', 'C211', 'C212', 'C213'],
                    ['CorWatt-', 'CorWatt0', 'CorWatt1'], 
                    ['CorWatt6', 'CorWatt7', 'CorWatt8', 'CorWatt9'], 
                    ['CorWatt10', 'CorWatt11', 'CorWatt12', 'CorWatt13'],
                    ['POLL-'], 
                    ['IRQ-', 'IRQ0', 'IRQ1'], 
                    ['IRQ6', 'IRQ7', 'IRQ8', 'IRQ9'], 
                    ['IRQ10', 'IRQ11', 'IRQ12', 'IRQ13'],               
                    ['rxkB/s', 'rxpck/s', 'txkB/s', 'txpck/s', '%util'],
                    ['TCP', 'UDP', 'UNIX', 'RAW', 'SCTP', 'DCCP']]

In [3]:
column_explanations_2cpu = {
    # Disk operation metrics
    'sectors_read': 'Number of sectors read from disk',
    'sectors_written': 'Number of sectors written to disk',
    'sectors_discarded': 'Number of sectors discarded from operations',
    'time_spent_reading_(ms)': 'Time(ms) spent on reading operations',
    'time_spent_writing_(ms)': 'Time(ms) spent on writing operations',
    'time_spent_discarding': 'Time(ms) spent on discarding data',
    'time_spent_flushing': 'Time(ms) spent on flushing data to storage',
    'reads_completed_successfully': 'Number of read operations completed successfully',
    'writes_completed': 'Number of write operations completed',
    'discards_completed_successfully': 'Number of discard operations completed successfully',
    'flush_requests_completed_successfully': 'Number of flush requests completed successfully',
    'reads_merged': 'Number of read operations merged',
    'writes_merged': 'Number of write operations merged',
    'discards_merged': 'Number of discard operations merged',
    'I/Os_currently_in_progress': 'Number of I/O operations currently in progress',
    'time_spent_doing_I/Os_(ms)': 'Time(ms) spent on I/O operations',
    'weighted_time_spent_doing_I/Os_(ms)': 'Weighted time(ms) spent on I/O operations',

    # CPU utilization and performance metrics
    'CPUUtilization': 'Percentage of CPU utilization',
    'Branches': 'Total number of CPU branch instructions processed',
    'Branches_CPU_Using': 'Branch instructions processed by the CPU',
    'All_Branches_Using': 'All branches being used across CPUs',
    'Branch_Misses': 'Number of branch instructions that the CPU failed to predict',
    'Branch_Misses_CPU_Using': 'Branch misses on the CPU',
    'Cache_References': 'Number of times the CPU cache was accessed',
    'Cache_References_CPU_Using': 'Cache references by the CPU',
    'All_Cache_Reference_Using': 'All cache references across CPUs',
    'Cache_Misses': 'Number of failed cache attempts',
    'Cache_Misses_CPU_Using': 'Cache misses by the CPU',
    'Cycles': 'Total number of cycles',
    'Cycles_CPU_Using': 'Cycles used by the CPU',
    'Instructions': 'Total number of instructions executed',
    'Instructions_CPU_Using': 'Instructions executed by the CPU',
    'Instructions_per_CPU_Cycle': 'Number of instructions per CPU cycle',
    'Last_Level_Cache_Accesses': 'Accesses to the last level cache',
    'Last_Level_Cache_Accesses_CPU_Using': 'Last level cache accesses by the CPU',
    'Load_Dispatches': 'Number of load dispatches',
    'Load_Dispatches_CPU_Using': 'Load dispatches by the CPU',
    'Storage_Dispatches': 'Number of storage dispatch operations',
    'Storage_Dispatches_CPU_Using': 'Storage dispatch operations by the CPU',
    'AvgMHz-': 'Average MHz across all CPUs',
    'AvgMHz0': 'Average MHz for CPU 0',
    'AvgMHz1': 'Average MHz for CPU 1',
    'AvgMHz2': 'Average MHz for CPU 2',
    'AvgMHz3': 'Average MHz for CPU 3',
    'Busy-': 'Total busy time for all CPUs',
    'Busy0': 'Busy time for CPU 0',
    'Busy1': 'Busy time for CPU 1',
    'Busy2': 'Busy time for CPU 2',
    'Busy3': 'Busy time for CPU 3',
    'BzyMHz-': 'Busy MHz across all CPUs',
    'BzyMHz0': 'Busy MHz for CPU 0',
    'BzyMHz1': 'Busy MHz for CPU 1',
    'BzyMHz2': 'Busy MHz for CPU 2',
    'BzyMHz3': 'Busy MHz for CPU 3',
    'C1-': 'Time spent in CPU power state C1 across all CPUs',
    'C10': 'Time spent in power state C1 for CPU 0',
    'C11': 'Time spent in power state C1 for CPU 1',
    'C12': 'Time spent in power state C1 for CPU 2',
    'C13': 'Time spent in power state C1 for CPU 3',
    'C2-': 'Time spent in CPU power state C2 across all CPUs',
    'C20': 'Time spent in power state C2 for CPU 0',
    'C21': 'Time spent in power state C2 for CPU 1',
    'C22': 'Time spent in power state C2 for CPU 2',
    'C23': 'Time spent in power state C2 for CPU 3',
    'CorWatt-': 'Core wattage usage across all CPUs',
    'CorWatt0': 'Core wattage for CPU 0',
    'CorWatt1': 'Core wattage for CPU 1',
    'CorWatt2': 'Core wattage for CPU 2',
    'CorWatt3': 'Core wattage for CPU 3',
    'PkgWatt-': 'Package wattage across all CPUs',
    'PkgWatt0': 'Package wattage for CPU 0',
    'POLL-': 'Polling time across all CPUs',
    'IRQ-': 'Interrupt request time across all CPUs',
    'IRQ0': 'Interrupt request time for CPU 0',
    'IRQ1': 'Interrupt request time for CPU 1',
    'IRQ2': 'Interrupt request time for CPU 2',
    'IRQ3': 'Interrupt request time for CPU 3',
    'rxkB/s': 'Received kilobytes per second',
    'rxpck/s': 'Received packets per second',
    'txkB/s': 'Transmitted kilobytes per second',
    'txpck/s': 'Transmitted packets per second',
    '%util': 'Percentage of network utilization',
    'TCP': 'Transmission Control Protocol connections',
    'UDP': 'User Datagram Protocol connections',
    'UNIX': 'UNIX socket connections',
    'RAW': 'Raw socket connections',
    'SCTP': 'Stream Control Transmission Protocol connections',
    'DCCP': 'Datagram Congestion Control Protocol connections'
}



In [4]:
column_explanations_8cpu = {
    # Common CPU performance metrics
    'CPUUtilization': 'Percentage of CPU utilization',
    'Branches': 'Total number of CPU branch instructions processed',
    'Branches_CPU_Using': 'Branch instructions processed by the CPU',
    'All_Branches_Using': 'All branches being used across CPUs',
    'Branch_Misses': 'Number of branch instructions that the CPU failed to predict',
    'Branch_Misses_CPU_Using': 'Branch misses on the CPU',
    'Cache_References': 'Number of times the CPU cache was accessed',
    'Cache_References_CPU_Using': 'Cache references by the CPU',
    'All_Cache_Reference_Using': 'All cache references across CPUs',
    'Cache_Misses': 'Number of failed cache attempts',
    'Cache_Misses_CPU_Using': 'Cache misses by the CPU',
    'Cycles': 'Total number of cycles',
    'Cycles_CPU_Using': 'Cycles used by the CPU',
    'Instructions': 'Total number of instructions executed',
    'Instructions_CPU_Using': 'Instructions executed by the CPU',
    'Instructions_per_CPU_Cycle': 'Number of instructions per CPU cycle',
    'Last_Level_Cache_Accesses': 'Accesses to the last level cache',
    'Last_Level_Cache_Accesses_CPU_Using': 'Last level cache accesses by the CPU',
    'Load_Dispatches': 'Number of load dispatches',
    'Load_Dispatches_CPU_Using': 'Load dispatches by the CPU',
    'Storage_Dispatches': 'Number of storage dispatch operations',
    'Storage_Dispatches_CPU_Using': 'Storage dispatch operations by the CPU',

    # Enhanced metrics for multi-core CPUs
    'AvgMHz-': 'Average MHz across all CPUs',
    'Busy-': 'Total busy time for all CPUs',
    'BzyMHz-': 'Busy MHz across all CPUs',
    'CorWatt-': 'Core wattage usage across all CPUs',
    'PkgWatt-': 'Package wattage across all CPUs',
    'PkgWatt0': 'Package wattage for CPU 0',
    'POLL-': 'Polling time across all CPUs',
    'IRQ-': 'Interrupt request time across all CPUs',
    'rxkB/s': 'Received kilobytes per second',
    'rxpck/s': 'Received packets per second',
    'txkB/s': 'Transmitted kilobytes per second',
    'txpck/s': 'Transmitted packets per second',
    '%util': 'Percentage of network utilization',
    'TCP': 'Transmission Control Protocol connections',
    'UDP': 'User Datagram Protocol connections',
    'UNIX': 'UNIX socket connections',
    'RAW': 'Raw socket connections',
    'SCTP': 'Stream Control Transmission Protocol connections',
    'DCCP': 'Datagram Congestion Control Protocol connections',

    # Specific CPU core metrics (general pattern)
    **{f'AvgMHz{i}': f'Average MHz for CPU {i}' for i in range(14) if i not in range(2, 6)},
    **{f'Busy{i}': f'Busy time for CPU {i}' for i in range(14) if i not in range(2, 6)},
    **{f'BzyMHz{i}': f'Busy MHz for CPU {i}' for i in range(14) if i not in range(2, 6)},
    **{f'IRQ{i}': f'Interrupt request time for CPU {i}' for i in range(14) if i not in range(2, 6)},
    **{f'CorWatt{i}': f'Core wattage for CPU {i}' for i in range(14) if i not in range(2, 6)},
    **{f'C1{i}': f'Time spent in power state C1 for CPU {i}' for i in range(14) if i not in range(4, 6)},
    **{f'C2{i}': f'Time spent in power state C2 for CPU {i}' for i in range(14) if i not in range(4, 6)},
}


In [18]:
save_directory = './saved_plots_2cpu/'

In [20]:
from pptx import Presentation
from pptx.util import Inches

# Create a PowerPoint presentation object
prs = Presentation()

def insert_figures_into_pptx(plot_filename, slide, top):
    # Calculate the left margin to center the image
    # Assuming standard slide size and image width
    image_width = Inches(9.6)
    slide_width = Inches(10)  # Typical width for a standard slide
    left = (slide_width - image_width) / 2  # Center the image horizontally

    # Add the image
    pic = slide.shapes.add_picture(plot_filename, left, top, width=image_width)  # Adjust width as needed


In [21]:
from matplotlib import cycler
import pandas as pd
import matplotlib.pyplot as plt
import os
import textwrap

# Define color cycles for primary and secondary y-axes
default_cycler = cycler('color', plt.cm.tab10.colors)
secondary_cycler = cycler('color', plt.cm.Set2.colors)

directory = './reanalyzed_data_2cpu/'

# Define a larger fontsize for better readability
fontsize = 14  # You can adjust this value as needed

for filename in shortest_files_list_2cpu:
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        data = pd.read_csv(file_path, delimiter='\t')
        data.ffill(inplace=True)  # Forward fill to handle missing data

        # Prepare filename for display in titles
        base_filename = filename.replace('.txt', '')

        # Convert timestamps and calculate elapsed seconds
        data['Timestamp'] = pd.to_datetime(data['Timestamp'])
        data['Seconds'] = (data['Timestamp'] - data['Timestamp'].iloc[0]).dt.total_seconds()

        figure_count = 0
        figures_per_slide = 3
        slide_height = Inches(7.5)  # Typical height for content in a standard slide
        figure_height = Inches(2.4)  # Height each figure takes, including some padding

        # Adding a title slide for the file
        title_slide_layout = prs.slide_layouts[0]  # Assuming layout 0 is the title slide layout
        slide = prs.slides.add_slide(title_slide_layout)
        title = slide.shapes.title
        title.text = f"Report for {filename.replace('.txt', '')} (run in a VM with 2 vCPU)"

        slide = prs.slides.add_slide(prs.slide_layouts[5])

        for group in standard_columns_2cpu:
            fig, ax1 = plt.subplots(figsize=(20, 5))
            ax1.set_prop_cycle(default_cycler)  # Set color cycle for primary axis

            # Check if secondary axis is needed
            secondary_y_needed = any('using' in col.lower() or '%' in col for col in group)

            if secondary_y_needed:
                ax2 = ax1.twinx()
                ax2.set_ylim(0, 100)
                ax2.set_ylabel('Percentage', fontsize=fontsize)
                ax2.set_prop_cycle(secondary_cycler)  # Set color cycle for secondary axis

            for column in group:
                if column in data.columns:
                    if 'using' in column.lower() or '%' in column:
                        # Plot on secondary y-axis if column involves percentages or specific usage
                        ax2.plot(data['Seconds'], data[column], label=column_explanations_2cpu.get(column, column), marker='.')
                    else:
                        # Plot on primary y-axis
                        ax1.plot(data['Seconds'], data[column], label=column_explanations_2cpu.get(column, column), marker='.')

            # Configure legends to display all labels
            lines, labels = ax1.get_legend_handles_labels()
            if secondary_y_needed:
                lines2, labels2 = ax2.get_legend_handles_labels()
                ax2.legend(lines + lines2, labels + labels2, loc='upper left', fontsize=fontsize)
            else:
                ax1.legend(fontsize=fontsize)

            # Set titles using the description dictionary
            title_text = ('Performance Metrics: ' + ', '.join([column_explanations_2cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax1.set_xlabel('Time (s)', fontsize=fontsize)
            ax1.set_ylabel('Value', fontsize=fontsize)
            ax1.grid(True)

            ax1.tick_params(axis='both', labelsize=fontsize)
            if secondary_y_needed:
                ax2.tick_params(axis='both', labelsize=fontsize)

            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            # Calculate vertical positioning based on the count
            current_top = Inches(0.3) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.3)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1

        for group in diff_columns:
            fig, ax = plt.subplots(figsize=(20, 5))
            ax.set_prop_cycle(default_cycler)

            # Calculate and plot the difference of each metric
            for column in group:
                if column in data.columns:
                    diff = data[column].diff() + 1  # Calculate difference and plot from the second point to avoid NaN
                    ax.plot(data['Seconds'][1:], diff[1:], label=f'Delta {column_explanations_2cpu.get(column, column)}', marker='.', linestyle='-')

            ax.set_yscale('log')
            ax.legend(fontsize=fontsize)
            title_text = ('Performance Metrics: Delta ' + ', '.join([column_explanations_2cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax.set_xlabel('Time (s)', fontsize=fontsize)
            ax.set_ylabel('Delta Value', fontsize=fontsize)
            ax.grid(True)
            ax.tick_params(axis='both', labelsize=fontsize)
            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            
            # Calculate vertical positioning based on the count
            current_top = Inches(0.3) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.3)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1
    # break


In [22]:
# Save the presentation
prs.save('performance_metrics_presentation_2cpu.pptx')

In [29]:
save_directory = './saved_plots_8cpu/'

In [30]:
from pptx import Presentation
from pptx.util import Inches

# Create a PowerPoint presentation object
prs = Presentation()

def insert_figures_into_pptx(plot_filename, slide, top):
    # Calculate the left margin to center the image
    # Assuming standard slide size and image width
    image_width = Inches(8)
    slide_width = Inches(10)  # Typical width for a standard slide
    left = (slide_width - image_width) / 2  # Center the image horizontally

    # Add the image
    pic = slide.shapes.add_picture(plot_filename, left, top, width=image_width)  # Adjust width as needed


In [31]:
from matplotlib import cycler
import pandas as pd
import matplotlib.pyplot as plt
import os
import textwrap

# Define color cycles for primary and secondary y-axes
default_cycler = cycler('color', plt.cm.tab10.colors)
secondary_cycler = cycler('color', plt.cm.Set2.colors)

directory = './reanalyzed_data_8cpu/'

# Define a larger fontsize for better readability
fontsize = 34  # You can adjust this value as needed

for filename in shortest_files_list_8cpu[0, 17]:
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        data = pd.read_csv(file_path, delimiter='\t')
        data.ffill(inplace=True)  # Forward fill to handle missing data

        # Prepare filename for display in titles
        base_filename = filename.replace('.txt', '')

        # Convert timestamps and calculate elapsed seconds
        data['Timestamp'] = pd.to_datetime(data['Timestamp'])
        data['Seconds'] = (data['Timestamp'] - data['Timestamp'].iloc[0]).dt.total_seconds()

        figure_count = 0
        figures_per_slide = 7
        slide_height = Inches(7.5)  # Typical height for content in a standard slide
        figure_height = Inches(1)  # Height each figure takes, including some padding

        # Adding a title slide for the file
        title_slide_layout = prs.slide_layouts[0]  # Assuming layout 0 is the title slide layout
        slide = prs.slides.add_slide(title_slide_layout)
        title = slide.shapes.title
        title.text = f"Report for {filename.replace('.txt', '')} (run in a VM with 8 vCPU)"

        slide = prs.slides.add_slide(prs.slide_layouts[5])

        for group in standard_columns_8cpu:
            fig, ax1 = plt.subplots(figsize=(40, 5))
            ax1.set_prop_cycle(default_cycler)  # Set color cycle for primary axis

            # Check if secondary axis is needed
            secondary_y_needed = any('using' in col.lower() or '%' in col for col in group)

            if secondary_y_needed:
                ax2 = ax1.twinx()
                ax2.set_ylim(0, 100)
                ax2.set_ylabel('Percentage', fontsize=fontsize)
                ax2.set_prop_cycle(secondary_cycler)  # Set color cycle for secondary axis

            for column in group:
                if column in data.columns:
                    if 'using' in column.lower() or '%' in column:
                        # Plot on secondary y-axis if column involves percentages or specific usage
                        ax2.plot(data['Seconds'], data[column], label=column_explanations_8cpu.get(column, column), marker='.')
                    else:
                        # Plot on primary y-axis
                        ax1.plot(data['Seconds'], data[column], label=column_explanations_8cpu.get(column, column), marker='.')

            # Configure legends to display all labels
            lines, labels = ax1.get_legend_handles_labels()
            if secondary_y_needed:
                lines2, labels2 = ax2.get_legend_handles_labels()
                ax2.legend(lines + lines2, labels + labels2, loc='upper left', fontsize=fontsize)
            else:
                ax1.legend(fontsize=fontsize)

            # Set titles using the description dictionary
            title_text = ('Performance Metrics: ' + ', '.join([column_explanations_8cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax1.set_xlabel('Time (s)', fontsize=fontsize)
            ax1.set_ylabel('Value', fontsize=fontsize)
            ax1.grid(True)

            ax1.tick_params(axis='both', labelsize=fontsize)
            if secondary_y_needed:
                ax2.tick_params(axis='both', labelsize=fontsize)

            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            # Calculate vertical positioning based on the count
            current_top = Inches(0.1) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.1)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1

        for group in diff_columns:
            fig, ax = plt.subplots(figsize=(40, 5))
            ax.set_prop_cycle(default_cycler)

            # Calculate and plot the difference of each metric
            for column in group:
                if column in data.columns:
                    diff = data[column].diff() + 1  # Calculate difference and plot from the second point to avoid NaN
                    ax.plot(data['Seconds'][1:], diff[1:], label=f'Delta {column_explanations_8cpu.get(column, column)}', marker='.', linestyle='-')

            ax.set_yscale('log')
            ax.legend(fontsize=fontsize)
            title_text = ('Performance Metrics: Delta ' + ', '.join([column_explanations_8cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax.set_xlabel('Time (s)', fontsize=fontsize)
            ax.set_ylabel('Delta Value', fontsize=fontsize)
            ax.grid(True)
            ax.tick_params(axis='both', labelsize=fontsize)
            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            
            # Calculate vertical positioning based on the count
            current_top = Inches(0) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.1)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1
    # break


In [32]:
# Save the presentation
prs.save('performance_metrics_presentation_8cpu_1.pptx')

In [None]:
save_directory = './saved_plots_8cpu/'

In [None]:
from pptx import Presentation
from pptx.util import Inches

# Create a PowerPoint presentation object
prs = Presentation()

def insert_figures_into_pptx(plot_filename, slide, top):
    # Calculate the left margin to center the image
    # Assuming standard slide size and image width
    image_width = Inches(8)
    slide_width = Inches(10)  # Typical width for a standard slide
    left = (slide_width - image_width) / 2  # Center the image horizontally

    # Add the image
    pic = slide.shapes.add_picture(plot_filename, left, top, width=image_width)  # Adjust width as needed


In [None]:
from matplotlib import cycler
import pandas as pd
import matplotlib.pyplot as plt
import os
import textwrap

# Define color cycles for primary and secondary y-axes
default_cycler = cycler('color', plt.cm.tab10.colors)
secondary_cycler = cycler('color', plt.cm.Set2.colors)

directory = './reanalyzed_data_8cpu/'

# Define a larger fontsize for better readability
fontsize = 34  # You can adjust this value as needed

for filename in shortest_files_list_8cpu[18:]:
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        data = pd.read_csv(file_path, delimiter='\t')
        data.ffill(inplace=True)  # Forward fill to handle missing data

        # Prepare filename for display in titles
        base_filename = filename.replace('.txt', '')

        # Convert timestamps and calculate elapsed seconds
        data['Timestamp'] = pd.to_datetime(data['Timestamp'])
        data['Seconds'] = (data['Timestamp'] - data['Timestamp'].iloc[0]).dt.total_seconds()

        figure_count = 0
        figures_per_slide = 7
        slide_height = Inches(7.5)  # Typical height for content in a standard slide
        figure_height = Inches(1)  # Height each figure takes, including some padding

        # Adding a title slide for the file
        title_slide_layout = prs.slide_layouts[0]  # Assuming layout 0 is the title slide layout
        slide = prs.slides.add_slide(title_slide_layout)
        title = slide.shapes.title
        title.text = f"Report for {filename.replace('.txt', '')} (run in a VM with 8 vCPU)"

        slide = prs.slides.add_slide(prs.slide_layouts[5])

        for group in standard_columns_8cpu:
            fig, ax1 = plt.subplots(figsize=(40, 5))
            ax1.set_prop_cycle(default_cycler)  # Set color cycle for primary axis

            # Check if secondary axis is needed
            secondary_y_needed = any('using' in col.lower() or '%' in col for col in group)

            if secondary_y_needed:
                ax2 = ax1.twinx()
                ax2.set_ylim(0, 100)
                ax2.set_ylabel('Percentage', fontsize=fontsize)
                ax2.set_prop_cycle(secondary_cycler)  # Set color cycle for secondary axis

            for column in group:
                if column in data.columns:
                    if 'using' in column.lower() or '%' in column:
                        # Plot on secondary y-axis if column involves percentages or specific usage
                        ax2.plot(data['Seconds'], data[column], label=column_explanations_8cpu.get(column, column), marker='.')
                    else:
                        # Plot on primary y-axis
                        ax1.plot(data['Seconds'], data[column], label=column_explanations_8cpu.get(column, column), marker='.')

            # Configure legends to display all labels
            lines, labels = ax1.get_legend_handles_labels()
            if secondary_y_needed:
                lines2, labels2 = ax2.get_legend_handles_labels()
                ax2.legend(lines + lines2, labels + labels2, loc='upper left', fontsize=fontsize)
            else:
                ax1.legend(fontsize=fontsize)

            # Set titles using the description dictionary
            title_text = ('Performance Metrics: ' + ', '.join([column_explanations_8cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax1.set_xlabel('Time (s)', fontsize=fontsize)
            ax1.set_ylabel('Value', fontsize=fontsize)
            ax1.grid(True)

            ax1.tick_params(axis='both', labelsize=fontsize)
            if secondary_y_needed:
                ax2.tick_params(axis='both', labelsize=fontsize)

            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            # Calculate vertical positioning based on the count
            current_top = Inches(0.1) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.1)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1

        for group in diff_columns:
            fig, ax = plt.subplots(figsize=(40, 5))
            ax.set_prop_cycle(default_cycler)

            # Calculate and plot the difference of each metric
            for column in group:
                if column in data.columns:
                    diff = data[column].diff() + 1  # Calculate difference and plot from the second point to avoid NaN
                    ax.plot(data['Seconds'][1:], diff[1:], label=f'Delta {column_explanations_8cpu.get(column, column)}', marker='.', linestyle='-')

            ax.set_yscale('log')
            ax.legend(fontsize=fontsize)
            title_text = ('Performance Metrics: Delta ' + ', '.join([column_explanations_8cpu.get(col, col) for col in group]) + ' in ' + base_filename).replace('_', ' ')
            wrapped_title = textwrap.fill(title_text, width = 140)
            plt.title(wrapped_title, fontsize=fontsize)
            ax.set_xlabel('Time (s)', fontsize=fontsize)
            ax.set_ylabel('Delta Value', fontsize=fontsize)
            ax.grid(True)
            ax.tick_params(axis='both', labelsize=fontsize)
            plt.tight_layout()
            # plt.show()
            save_path = save_directory + ''.join(group).replace(' ', '').replace('/', '') + '_' + base_filename + '.jpg'
            plt.savefig(save_path)
            plt.close()
            
            # Calculate vertical positioning based on the count
            current_top = Inches(0) + (figure_height * (figure_count % figures_per_slide))

            if figure_count % figures_per_slide == 0 and figure_count != 0:  # Check if the current slide is full
                slide = prs.slides.add_slide(prs.slide_layouts[5])
                current_top = Inches(0.1)  # Reset position for the new slide

            insert_figures_into_pptx(save_path, slide, current_top)
            figure_count += 1
    # break


In [None]:
# Save the presentation
prs.save('performance_metrics_presentation_8cpu_2.pptx')