# Skeleton Notebook

This Jupyter Notebook serves as a starting point for your future Notebooks. You can copy this Notebook to your directory, rename it, and extend it. 

In [None]:
# imports
import smfexplorer
from smfexplorer.fields import SMF70S1, SMF72S3  # change or add fields of interest here
from smfexplorer import names

from plotly import express as px  # for visualization

# for data processing
import pandas as pd
import numpy as np

In [None]:
# define Context
DATASET = "YOUR.SMF.DATA"
ctx = smfexplorer.new_context(DATASET)

In [None]:
# ------------------------------------------------#
# Fetch the data  (select one of 3 methods)
# ------------------------------------------------#

# 1. using request() method
df = ctx.request([RECORD.FIELD1, RECORD.FIELD2]).run()  # add fields
# example: df = ctx.request([SMF70S1.timestamp, SMF70S1.sid]).run()

# 2. using samples()
df = ctx.samples.SAMPLENAME().run()  # add sample name
# example: df = ctx.samples.lpar_information().run()

# 3. using samples() with display
df = ctx.samples.SAMPLENAME().run(
    display=[RECORD.FIELD1]
)  # add sample name and field name in display
# example: df = ctx.samples.lpar_information().run(display=[SMF70S1.capactiy_group_member])


# if you want to reduce the data before fetching, you can use where():
df = ctx.samples.SAMPLENAME().where((CONDITION1) & (CONDITION2)).run()
# example: ctx.samples.lpar_information().where((SMF70S1.lpar_name == SMF70S1.system_name) & (SMF70S1.lpar_cpu_count > 5)).run()

In [None]:
# display the data
display(df)

In [None]:
# -----------#
# Filter
# -----------#

# filter out date-time range
df = df.loc[
    (df["timestamp"] > "YYYY-MM-DD HH:MM:SS")
    & (df["timestamp"] < "YYYY-MM-DD HH:MM:SS")
]

# Get all Systems used in the dump
systems = df[
    names(RECORD.sid)
].unique()  # insted of sid you can use another field, to get at overview (e.g., lpar_name, cpu_type, etc.)
print(systems)

# Select one system
df = df[df[names(RECORD.sid)] == "SYSTEM_NAME"]

# Calculate sum or average
# get the average of FIELD1
avg = df[names(RECORD.FIELD1)].mean()
print("Average FIELD1 is ", avg)

# get the sum of FIELD1
sum_f = df[names(RECORD.FIELD1)].sum()
print("Sum FIELD1 is ", sum_f)

In [None]:
# -----------#
# Plot
# -----------#

# plot your data
# line plot
plot = px.line(
    df,
    x=names(RECORD.timestamp),  # usually x axis depics time
    y=names(RECORD.FIELD1),
    title="TITLE",
    labels={
        "initial_value": "Renamed value",
        "initial_value2": "Second renamed value",
        names(RECORD.timestamp): "Time",
    },
)
display(plot)

In [None]:
# create bar-chart
bar = px.bar(
    df,
    x="timestamp",
    y=["FIELD1", "FIELD2"],
    title="Ratio of FIELD1 and FIELD2 over time",
    labels={
        "initial_value": "Renamed value",
        "initial_value2": "Second renamed value",
        names(RECORD.timestamp): "Time",
    },
)
display(bar)