In [12]:
import pandas as pd

def txt_to_pd(input_txt): 
    mpi_processes = []
    real_time = []
    user_time = []
    sys_time = []

    # Open the text file
    with open(input_txt, 'r') as file:
        lines = file.readlines()

        # Initialize variables to hold current run and process numbers
        current_processes = None
        real = None
        user = None
        sys = None

        # Iterate through each line in the file
        for line in lines:
            if line.startswith(("MPI processes:", "OpenMP threads")):
                current_processes = int(line.split(":")[1].strip())
                mpi_processes.append(current_processes)
                
            elif line.startswith("real"):
                real = line.split()[-1].strip()
                minutes, seconds = real.split('m')
                seconds = seconds[:-1]
                # Convert minutes and seconds to seconds
                total_seconds = int(minutes) * 60 + float(seconds)
                real_time.append(total_seconds)
            
            elif line.startswith("user"):
                user = line.split()[-1].strip()
                minutes, seconds = user.split('m')
                seconds = seconds[:-1]
                # Convert minutes and seconds to seconds
                total_seconds = int(minutes) * 60 + float(seconds)
                user_time.append(total_seconds)
                
            
            elif line.startswith("sys"):
                sys = line.split()[-1].strip()
                minutes, seconds = sys.split('m')
                seconds = seconds[:-1]
                # Convert minutes and seconds to seconds
                total_seconds = int(minutes) * 60 + float(seconds)
                sys_time.append(total_seconds)

    data = {
    'MPI processes': mpi_processes,
    'Real time': real_time,
    'User time': user_time,
    'System time': sys_time}

    data = pd.DataFrame(data)

    return data


# MPI scaling: 


In [13]:
df = txt_to_pd('results/mpi_scaling.txt')
baseline : float = 1565.986
df['Speedup'] = baseline / df['Real time']
df['theoretical_speedup'] = 1/ ((0.001) + (0.999 / (df['MPI processes'] - 1)))
df['theoretical time'] = baseline / df['theoretical_speedup']
df

Unnamed: 0,MPI processes,Real time,User time,System time,Speedup,theoretical_speedup,theoretical time
0,2,1565.986,3117.178,1.431,1.000000,1.000000,1565.986000
1,4,733.536,2899.800,2.358,2.134845,2.994012,523.039324
2,6,403.249,2384.972,3.700,3.883422,4.980080,314.449989
3,8,477.725,3766.183,5.679,3.278007,6.958250,225.054559
4,10,327.489,3216.891,7.314,4.781797,8.928571,175.390432
...,...,...,...,...,...,...,...
123,248,37.150,3713.272,272.548,42.153055,198.234350,7.899670
124,250,36.377,3617.018,279.145,43.048795,199.519231,7.848797
125,252,37.071,3688.596,266.279,42.242885,200.800000,7.798735
126,254,36.365,3613.696,271.832,43.063000,202.076677,7.749464


### Plotting the execution time 

In [14]:
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

# Create a line for Real time
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['MPI processes'], y=df['Real time'], mode='lines', name='Measured time'))

# Create a line for Theoretical time
fig.add_trace(go.Scatter(x=df['MPI processes'], y=df['theoretical time'], mode='lines', name='Theoretical time'))

total_processes = df['MPI processes'].max()
quarter_points = [int(total_processes / 4) * i for i in range(5)]  # [0, 1/4, 2/4, 3/4, 1]

# Set the x-axis tick marks and labels
fig.update_xaxes(
    tickvals=quarter_points,
    ticktext=[f'{int(p)}' for p in quarter_points]  # You can customize the format of the labels
)

# Annotate the quarter points with y-axis labels for both Real time and Theoretical time
for x in quarter_points:
    # Find the nearest MPI processes value to the quarter point for Real time
    nearest_idx_real = np.abs(df['MPI processes'] - x).idxmin()
    nearest_mpi_processes_real = df.loc[nearest_idx_real, 'MPI processes']
    nearest_real_time = df.loc[nearest_idx_real, 'Real time']
    fig.add_annotation(x=nearest_mpi_processes_real, y=nearest_real_time + 100, text=str(nearest_real_time), showarrow=True, arrowhead=1)

    # Find the nearest MPI processes value to the quarter point for Theoretical time
    nearest_idx_theoretical = np.abs(df['MPI processes'] - x).idxmin()
    nearest_mpi_processes_theoretical = df.loc[nearest_idx_theoretical, 'MPI processes']
    nearest_theoretical_time = df.loc[nearest_idx_theoretical, 'theoretical time']
    fig.add_annotation(x=nearest_mpi_processes_theoretical, y=nearest_theoretical_time, text=str(nearest_theoretical_time), showarrow=True, arrowhead=1)

fig.update_layout(
    title="MPI Scaling in seconds",
    xaxis_title="MPI processes",
    yaxis_title="Time in seconds"
)
fig.show()

### Determining the speed up 

In [15]:
# Create a line for Real time
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['MPI processes'], y=df['Speedup'], mode='lines', name='Measured speedup'))

# Create a line for Theoretical time
fig.add_trace(go.Scatter(x=df['MPI processes'], y=df['theoretical_speedup'], mode='lines', name='Theoretical speedup'))

total_processes = df['MPI processes'].max()
quarter_points = [int(total_processes / 4) * i for i in range(5)]  # [0, 1/4, 2/4, 3/4, 1]

# Set the x-axis tick marks and labels
fig.update_xaxes(
    tickvals=quarter_points,
    ticktext=[f'{int(p)}' for p in quarter_points]  # You can customize the format of the labels
)

# Annotate the quarter points with y-axis labels for both Real time and Theoretical time
for x in quarter_points:
    # Find the nearest MPI processes value to the quarter point for Real time
    nearest_idx_real = np.abs(df['MPI processes'] - x).idxmin()
    nearest_mpi_processes_real = df.loc[nearest_idx_real, 'MPI processes']
    nearest_real_time = df.loc[nearest_idx_real, 'Speedup']
    fig.add_annotation(x=nearest_mpi_processes_real, y=nearest_real_time, text=str(nearest_real_time), showarrow=True, arrowhead=1)

    # Find the nearest MPI processes value to the quarter point for Theoretical time
    nearest_idx_theoretical = np.abs(df['MPI processes'] - x).idxmin()
    nearest_mpi_processes_theoretical = df.loc[nearest_idx_theoretical, 'MPI processes']
    nearest_theoretical_time = df.loc[nearest_idx_theoretical, 'theoretical_speedup']
    fig.add_annotation(x=nearest_mpi_processes_theoretical, y=nearest_theoretical_time, text=str(nearest_theoretical_time), showarrow=True, arrowhead=1)

fig.update_layout(
    title="MPI Speedup",
    xaxis_title="MPI processes",
    yaxis_title="Speedup"
)
# Show the plot
fig.show()

# OpenMP scaling

In [16]:
df_omp = txt_to_pd('results/omp_scaling.txt')
omp_df = df_omp.rename(columns={'MPI processes': 'OpenMP threads'})
baseline : float = 926.609
omp_df['Speedup'] = baseline / df['Real time']
omp_df

Unnamed: 0,OpenMP threads,Real time,User time,System time,Speedup
0,2,926.609,1834.458,3.466,0.59171
1,4,856.403,3384.166,3.898,1.263209
2,6,462.5,2735.666,2.575,2.297858
3,8,553.89,4359.398,3.365,1.939628
4,10,367.81,3611.656,2.393,2.829435
5,12,459.878,5404.4,3.15,2.249646
6,14,339.712,4645.499,2.659,3.058731
7,16,476.156,7436.214,3.601,2.259592
8,18,310.705,5445.415,2.602,3.268866
9,20,302.655,5891.768,2.612,3.452665


### Plotting the execution time

In [17]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# Plot the execution time 

fig = px.line(omp_df, x='OpenMP threads', y='Real time', title="OpenMP Scaling in seconds")
total_processes = omp_df['OpenMP threads'].max()
quarter_points = [int(total_processes / 4) * i for i in range(5)]  # [0, 1/4, 2/4, 3/4, 1]

# Set the x-axis tick marks and labels
fig.update_xaxes(
    tickvals=quarter_points,
    ticktext=[f'{int(p)}' for p in quarter_points]  # You can customize the format of the labels
)

# Annotate the quarter points with y-axis labels
for x in quarter_points:
    # Find the nearest MPI processes value to the quarter point
    nearest_idx = np.abs(omp_df['OpenMP threads'] - x).idxmin()
    nearest_mpi_processes = omp_df.loc[nearest_idx, 'OpenMP threads']
    nearest_real_time = omp_df.loc[nearest_idx, 'Real time']
    fig.add_annotation(x=nearest_mpi_processes, y=nearest_real_time, text=str(nearest_real_time), showarrow=True, arrowhead=1)

# Show the plot
fig.show()

### Determining the speedup 

In [18]:
# Plot the execution time 

fig = px.line(omp_df, x='OpenMP threads', y='Speedup', title="Speedup factor")
total_processes = omp_df['OpenMP threads'].max()
quarter_points = [int(total_processes / 4) * i for i in range(5)]  # [0, 1/4, 2/4, 3/4, 1]

# Set the x-axis tick marks and labels
fig.update_xaxes(
    tickvals=quarter_points,
    ticktext=[f'{int(p)}' for p in quarter_points]  # You can customize the format of the labels
)

# Annotate the quarter points with y-axis labels
for x in quarter_points:
    # Find the nearest MPI processes value to the quarter point
    nearest_idx = np.abs(omp_df['OpenMP threads'] - x).idxmin()
    nearest_mpi_processes = omp_df.loc[nearest_idx, 'OpenMP threads']
    nearest_real_time = omp_df.loc[nearest_idx, 'Speedup']
    fig.add_annotation(x=nearest_mpi_processes, y=nearest_real_time, text=str(nearest_real_time), showarrow=True, arrowhead=1)

# Show the plot
fig.show()