# Grafy
Notatnik tworzący wszelakiej maści grafy, które są wykorzystane w pracy magisterskiej dotyczące przetwarzania i opisu danych.

In [31]:
import pandas as pd
import plotnine as plt
import os

organised_directory = "Organised"
plots_directory = "Plots"

### Przykładowy graf surowego szeregu czasowego dla każdego typu.

In [32]:
udf_types = ["aggregation", "filtration", "filtration-aggregation", "filtration-aggregation-join", "filtration-join"]
dataset_df = pd.read_csv(f"./../{organised_directory}/Time_series_udf_dataset.csv")
snapshot = 1
for udf_type in udf_types:
  snapshot_df = dataset_df[(dataset_df["label"] == udf_type) & (dataset_df["snapshot"] == snapshot)]

  cpu_plot = (plt.ggplot(snapshot_df)
    + plt.aes(x="epoch", y = "CPU")
    + plt.geom_line(size=0.5, color = "red")
    + plt.labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
  )

  ram_plot = (plt.ggplot(snapshot_df)
    + plt.aes(x="epoch", y = "RAM")
    + plt.geom_line(size=0.5, color = "red")
    + plt.labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
  )
  if not os.path.exists(f"./../{plots_directory}/{udf_type}"):
    os.makedirs(f"./../{plots_directory}/{udf_type}")
    
  cpu_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_cpu_snapshot_{snapshot}.png", dpi=600, verbose = False)
  ram_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_ram_snapshot_{snapshot}.png", dpi=600, verbose = False)

### Przykładowy graf znormalizowanego szeregu czasowego dla każdego typu.

In [33]:
udf_types = ["aggregation", "filtration", "filtration-aggregation", "filtration-aggregation-join", "filtration-join"]
dataset_df = pd.read_csv(f"./../{organised_directory}/Time_series_udf_dataset_normalized.csv")
snapshot = 1
for udf_type in udf_types:
  snapshot_df = dataset_df[(dataset_df["label"] == udf_type) & (dataset_df["snapshot"] == snapshot)]

  cpu_plot = (plt.ggplot(snapshot_df)
    + plt.aes(x="epoch", y = "CPU")
    + plt.geom_line(size=0.5, color = "red")
    + plt.labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
  )

  ram_plot = (plt.ggplot(snapshot_df)
    + plt.aes(x="epoch", y = "RAM")
    + plt.geom_line(size=0.5, color = "red")
    + plt.labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
  )
  if not os.path.exists(f"./../{plots_directory}/{udf_type}"):
    os.makedirs(f"./../{plots_directory}/{udf_type}")
    
  cpu_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_normalized_cpu_snapshot_{snapshot}.png", dpi=600, verbose = False)
  ram_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_normalized_ram_snapshot_{snapshot}.png", dpi=600, verbose = False)

### Przykładowe wygładzone surowe grafy.

In [37]:
udf_types = ["aggregation", "filtration", "filtration-aggregation", "filtration-aggregation-join", "filtration-join"]
snapshot = 1
samples = [6, 12, 18]
for udf_type in udf_types:
    for sample in samples:
        dataset_df = pd.read_csv(f"./../{organised_directory}/Time_series_udf_dataset_smooth_{sample}.csv")
        snapshot_df = dataset_df[(dataset_df["label"] == udf_type) & (dataset_df["snapshot"] == snapshot)]

        cpu_plot = (plt.ggplot(snapshot_df)
            + plt.aes(x="epoch", y = "CPU")
            + plt.geom_line(size=0.5, color = "red")
            + plt.labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
        )

        ram_plot = (plt.ggplot(snapshot_df)
            + plt.aes(x="epoch", y = "RAM")
            + plt.geom_line(size=0.5, color = "red")
            + plt.labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
        )
        if not os.path.exists(f"./../{plots_directory}/{udf_type}"):
            os.makedirs(f"./../{plots_directory}/{udf_type}")
            
        cpu_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_cpu_snapshot_{snapshot}_smooth_{sample}.png", dpi=600, verbose = False)
        ram_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_ram_snapshot_{snapshot}_smooth_{sample}.png", dpi=600, verbose = False)

### Przykładowe wygładzone znormalizowane grafy

In [35]:
udf_types = ["aggregation", "filtration", "filtration-aggregation", "filtration-aggregation-join", "filtration-join"]
snapshot = 1
samples = [6, 12, 18]
for udf_type in udf_types:
    for sample in samples:
        dataset_df = pd.read_csv(f"./../{organised_directory}/Time_series_udf_dataset_normalized_smooth_{sample}.csv")
        snapshot_df = dataset_df[(dataset_df["label"] == udf_type) & (dataset_df["snapshot"] == snapshot)]

        cpu_plot = (plt.ggplot(snapshot_df)
            + plt.aes(x="epoch", y = "CPU")
            + plt.geom_line(size=0.5, color = "red")
            + plt.labs(title=f"Przebieg zużycia CPU - {udf_type}", y="Zużycie procesora (%)", x = "Czas (s)")
        )

        ram_plot = (plt.ggplot(snapshot_df)
            + plt.aes(x="epoch", y = "RAM")
            + plt.geom_line(size=0.5, color = "red")
            + plt.labs(title=f"Przebieg zużycia RAM - {udf_type}", y="Zużycie RAM (%)", x = "Czas (s)")
        )
        if not os.path.exists(f"./../{plots_directory}/{udf_type}"):
            os.makedirs(f"./../{plots_directory}/{udf_type}")
            
        cpu_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_normalized_cpu_snapshot_{snapshot}_smooth_{sample}.png", dpi=600, verbose = False)
        ram_plot.save(f"./../{plots_directory}/{udf_type}/{udf_type}_example_normalized_ram_snapshot_{snapshot}_smooth_{sample}.png", dpi=600, verbose = False)

### Histogramy dla CPU oraz RAM

In [36]:
df = pd.read_csv(f"./../{organised_directory}/Time_series_udf_dataset.csv")

cpu_plot = (plt.ggplot(df, plt.aes(x='CPU'))
  + plt.geom_histogram(size=0.5, binwidth=20)
  + plt.labs(title=f"Histogram", y="Liczebność", x = "CPU")
)

ram_plot = (plt.ggplot(df, plt.aes(x='RAM'))
  + plt.geom_histogram(size=0.5, binwidth=0.6)
  + plt.labs(title=f"Histogram", y="Liczebność", x = "RAM")
)
if not os.path.exists(f"./../Plots/"):
  os.makedirs(f"./../Plots/")
  
cpu_plot.save(f"./../Plots/hist_cpu.png", dpi=600, verbose = False)
ram_plot.save(f"./../Plots/hist_ram.png", dpi=600, verbose = False)