In [None]:
import pandas as pd
from plotnine import *
import os

In [None]:
entry_directory = "Raw"
prepared_directory = "Prepared"
organised_directory = "Organised"

## Example graphs for each type

In [None]:
%matplotlib inline
udf_types = ['aggregation', 'filtration', 'filtration-aggregation', 'filtration-aggregation-join', 'filtration-join']
for udf_type in udf_types:
  joined_file_all = pd.read_csv(f"{organised_directory}/{udf_type}/joined_{udf_type}.csv")
  joined_file_snapshot = joined_file_all[joined_file_all["snapshot"] == 1]

  cpu_plot = (ggplot(joined_file_snapshot)         # defining what data to use
    + aes(x='epoch', y = "CPU")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
  )

  ram_plot = (ggplot(joined_file_snapshot)         # defining what data to use
    + aes(x='epoch', y = "RAM")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
  )
  if not os.path.exists(f"Plots/{udf_type}"):
    os.makedirs(f"Plots/{udf_type}")
    
  cpu_plot.save(f"Plots/{udf_type}/{udf_type}_example_cpu_snapshot_1.png", dpi=600)
  ram_plot.save(f"Plots/{udf_type}/{udf_type}_example_ram_snapshot_1.png", dpi=600)


## Example smooth graphs for aggregation

In [None]:
%matplotlib inline
udf_type = "aggregation"
samples = [6, 12, 18]
for sample in samples:
  joined_aggregation_all = pd.read_csv(f"{organised_directory}/{udf_type}/{sample}_smooth_{udf_type}.csv")
  joined_aggregation = joined_aggregation_all[joined_aggregation_all["snapshot"] == 1]

  cpu_plot = (ggplot(joined_aggregation)         # defining what data to use
    + aes(x='epoch', y = "CPU")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
  )

  ram_plot = (ggplot(joined_aggregation)         # defining what data to use
    + aes(x='epoch', y = "RAM")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
  )
  if not os.path.exists(f"Plots/{udf_type}"):
    os.makedirs(f"Plots/{udf_type}")
    
  cpu_plot.save(f"Plots/{udf_type}/{udf_type}_smooth_{sample}_cpu_snapshot_1.png", dpi=600)
  ram_plot.save(f"Plots/{udf_type}/{udf_type}_smooth_{sample}_ram_snapshot_1.png", dpi=600)

## Example standardised graphs for aggregation

In [None]:
%matplotlib inline
udf_type = "aggregation"
samples = [1, 30, 60]
joined_aggregation_all = pd.read_csv(f"{organised_directory}/{udf_type}/standardised_{udf_type}.csv")
for sample in samples:
  joined_aggregation = joined_aggregation_all[joined_aggregation_all["snapshot"] == sample]

  cpu_plot = (ggplot(joined_aggregation)         # defining what data to use
    + aes(x='epoch', y = "CPU")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
  )

  ram_plot = (ggplot(joined_aggregation)         # defining what data to use
    + aes(x='epoch', y = "RAM")    # defining what variable to use
    + geom_line(size=0.5, color = "red") # defining the type of plot to use
    + labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
  )
  if not os.path.exists(f"Plots/{udf_type}"):
    os.makedirs(f"Plots/{udf_type}")
    
  cpu_plot.save(f"Plots/{udf_type}/{udf_type}_standardised_cpu_snapshot_{sample}.png", dpi=600)
  ram_plot.save(f"Plots/{udf_type}/{udf_type}_standardised_ram_snapshot_{sample}.png", dpi=600)