# Feature Transformation with Scikit-Learn In This Notebook
## Saving Features into the SageMaker Feature Store



In [1]:
!pip install --disable-pip-version-check -q tensorflow==2.8.1
!pip install --disable-pip-version-check -q transformers==4.46.0
!pip install protobuf==3.20.*
import sagemaker
import boto3

sess = sagemaker.Session()
bucket = "cardiovale-solutions-datascience-pipeline"
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.8.1 (from versions: 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.12.1, 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0, 2.13.1, 2.14.0rc0, 2.14.0rc1, 2.14.0, 2.14.1, 2.15.0rc0, 2.15.0rc1, 2.15.0, 2.15.0.post1, 2.15.1, 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.8.1[0m[31m
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


# Prepare the data

In [5]:
import pandas as pd
import boto3
import sagemaker
from sagemaker import get_execution_role
from pyathena import connect

# Setup AWS session
region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
bucket = "cardiovale-solutions-datascience-pipeline"  # Your actual S3 bucket
role = get_execution_role()

# Connect to Athena
conn = connect(s3_staging_dir=f"s3://{bucket}/athena/staging/", region_name=region)

# Fetch `cardio_train_cleaned` dataset
query = "SELECT * FROM cardiovale_db.cardio_train_cleaned"
df_cardio = pd.read_sql(query, conn)

print("Successfully loaded cardio_train_cleaned from Athena")
print(df_cardio.head())


  df_cardio = pd.read_sql(query, conn)


Successfully loaded cardio_train_cleaned from Athena
   id    age  gender  height  weight  ap_hi  ap_lo  cholesterol  gluc  smoke  \
0   0  18393       2     168    62.0    110     80            1     1      0   
1   1  20228       1     156    85.0    140     90            3     1      0   
2   2  18857       1     165    64.0    130     70            3     1      0   
3   3  17623       2     169    82.0    150    100            1     1      0   
4   4  17474       1     156    56.0    100     60            1     1      0   

   alco  active  cardio  
0     0       1       0  
1     0       1       1  
2     0       0       1  
3     0       1       1  
4     0       0       0  


In [9]:
#fetch quitline_fixed_.csv
query = "SELECT * FROM cardiovale_db.quitline_fixed LIMIT 5"
df_quitline = pd.read_sql(query, conn)

print("Successfully loaded quitline_fixed from Athena")
print(df_quitline.head())


  df_quitline = pd.read_sql(query, conn)


Successfully loaded quitline_fixed from Athena
   year  date date_ref locationabbr locationdesc topictype  \
0  2020    12  Jul-Dec           GA      Georgia  Quitline   
1  2020    12  Jul-Dec           MO     Missouri  Quitline   
2  2020    12  Jul-Dec           MT      Montana  Quitline   
3  2020    12  Jul-Dec           NJ   New Jersey  Quitline   
4  2020    12  Jul-Dec           OK     Oklahoma  Quitline   

            topicdesc  measuredesc         sub_measure              variable  \
0  Services Available  Medications    Nicotine Lozenge                         
1  Services Available  Medications    Nicotine Lozenge                         
2  Services Available  Medications  Bupropion (Zyban®)                         
3  Services Available  Medications    Nicotine Lozenge                         
4  Services Available  Medications        Nicotine Gum  All Eligible Callers   

   ... number_of_weeks_offered    limit_per_year comments  \
0  ...                     NaN        

In [18]:
# --------------------------------------
# The cardio dataset does not have a 'State' column, but the quitline dataset is state-based.
# To merge both datasets, we randomly assign a state to each row in the cardio dataset.
# This ensures we can associate each patient with state-level quitline data.
# However, this is an artificial assignment and does not reflect real patient locations.
import random

# Use correct column name from quitline dataset
state_column = "locationabbr"

# Extract unique states from quitline dataset
states = df_quitline[state_column].dropna().unique().tolist()

# Randomly assign a state to each row in the cardio dataset
df_cardio["State"] = [random.choice(states) for _ in range(len(df_cardio))]

# Aggregating Quitline Data
# ----------------------------
# We summarize state-level quitline data by:
# - Calculating the percentage of states where free services are offered
# - Calculating the percentage of states with medical restrictions
# - Taking the mean of the number of weeks offered for treatment

df_quitline_summary = df_quitline.groupby(state_column).agg({
    "offered_for_free": lambda x: (x == "Yes").mean(),  # Convert Yes/No to numerical (percentage)
    "medical_restrictions": lambda x: (x == "Yes").mean(),
    "number_of_weeks_offered": "mean"
}).reset_index()

# Rename column for consistency
df_quitline_summary.rename(columns={state_column: "State"}, inplace=True)

# Merging Data
# ----------------
# We now merge the cardio dataset with the quitline summary based on 'State'.
# This allows each row in df_cardio to inherit state-level information from quitline.

df_merged = df_cardio.merge(df_quitline_summary, on="State", how="left")
df_merged["number_of_weeks_offered"].fillna(0, inplace=True)
df_merged["State"] = df_merged["State"].astype("category")



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged["number_of_weeks_offered"].fillna(0, inplace=True)


In [19]:
df_merged.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,State,offered_for_free,medical_restrictions,number_of_weeks_offered
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0,NJ,1.0,0.0,0.0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1,NJ,1.0,0.0,0.0
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1,GA,0.0,0.0,0.0
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1,MO,0.0,0.0,0.0
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0,OK,1.0,1.0,2.0


## Feature Store requires an Event Time feature


In [20]:
from datetime import datetime
from time import strftime

# timestamp = datetime.now().replace(microsecond=0).isoformat()
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
print(timestamp)

2025-03-21T05:52:50Z


# Add Features to SageMaker Feature Store

## SageMaker Feature Store Runtime


In [21]:
featurestore_runtime = boto3.Session().client(service_name="sagemaker-featurestore-runtime", region_name=region)

## Create FeatureGroup

In [22]:
from time import gmtime, strftime, sleep

feature_group_name = "reviews-feature-group-" + strftime("%d-%H-%M-%S", gmtime())
print(feature_group_name)

reviews-feature-group-21-05-56-35


In [23]:
from sagemaker.feature_store.feature_definition import (
    FeatureDefinition,
    FeatureTypeEnum,
)

# Define feature definitions for the merged cardio + quitline dataset
feature_definitions = [
    FeatureDefinition(feature_name="id", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="age", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="gender", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="height", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="weight", feature_type=FeatureTypeEnum.FRACTIONAL),
    FeatureDefinition(feature_name="ap_hi", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="ap_lo", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="cholesterol", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="gluc", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="smoke", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="alco", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="active", feature_type=FeatureTypeEnum.INTEGRAL),
    FeatureDefinition(feature_name="cardio", feature_type=FeatureTypeEnum.INTEGRAL),
    
    # Quitline features
    FeatureDefinition(feature_name="State", feature_type=FeatureTypeEnum.STRING),
    FeatureDefinition(feature_name="offered_for_free", feature_type=FeatureTypeEnum.FRACTIONAL),
    FeatureDefinition(feature_name="medical_restrictions", feature_type=FeatureTypeEnum.FRACTIONAL),
    FeatureDefinition(feature_name="number_of_weeks_offered", feature_type=FeatureTypeEnum.FRACTIONAL),

    # Required timestamp for SageMaker Feature Store
    FeatureDefinition(feature_name="event_time", feature_type=FeatureTypeEnum.STRING)
]


In [10]:
from sagemaker.feature_store.feature_group import FeatureGroup

feature_group = FeatureGroup(name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sess)
print(feature_group)

FeatureGroup(name='reviews-feature-group-21-04-51-42', sagemaker_session=<sagemaker.session.Session object at 0x7f63bff91250>, feature_definitions=[FeatureDefinition(feature_name='input_ids', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None), FeatureDefinition(feature_name='input_mask', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None), FeatureDefinition(feature_name='segment_ids', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None), FeatureDefinition(feature_name='label_id', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None), FeatureDefinition(feature_name='review_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None), FeatureDefinition(feature_name='date', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None), FeatureDefinition(feature_name='label', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None), FeatureDefinition(feature_name='split_t

## Specify `record identifier` and `event time` features

In [11]:
record_identifier_feature_name = "review_id"
event_time_feature_name = "date"

## Set S3 Prefix for Offline Feature Store

In [12]:
prefix = "reviews-feature-store-" + timestamp
print(prefix)

reviews-feature-store-2025-03-21T04:51:42Z


## Create Feature Group

The last step for creating the feature group is to use the `create` function. The online store is not created by default, so we must set this as `True` if we want to enable it. The `s3_uri` is the location of our offline store.

In [13]:
feature_group.create(
    s3_uri=f"s3://{bucket}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_feature_name,
    role_arn=role,
    enable_online_store=False,
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:786782285170:feature-group/reviews-feature-group-21-04-51-42',
 'ResponseMetadata': {'RequestId': '96e8b07c-cad4-4bbf-98ca-6641bb45053c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '96e8b07c-cad4-4bbf-98ca-6641bb45053c',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '110',
   'date': 'Fri, 21 Mar 2025 04:51:43 GMT'},
  'RetryAttempts': 0}}

## Describe the Feature Group

In [14]:
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:786782285170:feature-group/reviews-feature-group-21-04-51-42',
 'FeatureGroupName': 'reviews-feature-group-21-04-51-42',
 'RecordIdentifierFeatureName': 'review_id',
 'EventTimeFeatureName': 'date',
 'FeatureDefinitions': [{'FeatureName': 'input_ids', 'FeatureType': 'String'},
  {'FeatureName': 'input_mask', 'FeatureType': 'String'},
  {'FeatureName': 'segment_ids', 'FeatureType': 'String'},
  {'FeatureName': 'label_id', 'FeatureType': 'Integral'},
  {'FeatureName': 'review_id', 'FeatureType': 'String'},
  {'FeatureName': 'date', 'FeatureType': 'String'},
  {'FeatureName': 'label', 'FeatureType': 'Integral'},
  {'FeatureName': 'split_type', 'FeatureType': 'String'}],
 'CreationTime': datetime.datetime(2025, 3, 21, 4, 51, 42, 783000, tzinfo=tzlocal()),
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-east-1-786782285170/reviews-feature-store-2025-03-21T04:51:42Z',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-east-1-

## List All Feature Groups

We use the boto3 SageMaker client to list all FeatureGroups.

In [15]:
# sm.list_feature_groups()

## Wait For The Feature Group Creation Complete

Creating a feature group takes time as the data is loaded. We will need to wait until it is created before you can use it. You can check status using the following method.

In [16]:
import time


def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group Creation")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    if status != "Created":
        raise RuntimeError(f"Failed to create feature group {feature_group.name}")
    print(f"FeatureGroup {feature_group.name} successfully created.")

In [17]:
wait_for_feature_group_creation_complete(feature_group=feature_group)

Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
FeatureGroup reviews-feature-group-21-04-51-42 successfully created.


## Review The Records To Ingest Into Feature Store

In [18]:
max_seq_length = 64
records = transform_inputs_to_tfrecord(inputs, output_file, max_seq_length)

Writing input 0 of 3

**37 tokens**
['[CLS]', 'i', 'needed', 'an', '"', 'anti', '##virus', '"', 'application', 'and', 'know', 'the', 'quality', 'of', 'norton', 'products', '.', 'this', 'was', 'a', 'no', 'brain', '##er', 'for', 'me', 'and', 'i', 'am', 'glad', 'it', 'was', 'so', 'simple', 'to', 'get', '.', '[SEP]']

**input_ids**
[101, 1045, 2734, 2019, 1000, 3424, 23350, 1000, 4646, 1998, 2113, 1996, 3737, 1997, 10770, 3688, 1012, 2023, 2001, 1037, 2053, 4167, 2121, 2005, 2033, 1998, 1045, 2572, 5580, 2009, 2001, 2061, 3722, 2000, 2131, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

**input_mask**
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

**segment_ids**
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0



# Ingest Records into Feature Store

After the FeatureGroups have been created, we can put data into the FeatureGroups by using the `PutRecord` API. 

This API can handle high TPS and is designed to be called by different streams. The data from all of these Put requests is buffered and written to S3 in chunks. 

The files will be written to the offline store within a few minutes of ingestion. To accelerate the ingestion process, we can specify multiple workers to do the job simultaneously. 

Use `put_record(...)` to put a single record in the FeatureGroup.

Use `ingest(...)` to ingest the content of a pandas DataFrame to Feature Store. You can set the `max_worker` to the number of threads to be created to work on different partitions of the `data_frame` in parallel.

In [19]:
import pandas as pd

df_records = pd.DataFrame.from_dict(records)
df_records["split_type"] = "train"
df_records

Unnamed: 0,input_ids,input_mask,segment_ids,label_id,review_id,date,label,split_type
0,"[101, 1045, 2734, 2019, 1000, 3424, 23350, 100...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4,ABCD12345,2025-03-21T04:51:42Z,5,train
1,"[101, 1996, 3291, 2007, 10777, 23663, 2003, 20...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2,EFGH12345,2025-03-21T04:51:42Z,3,train
2,"[101, 6659, 1010, 3904, 1997, 2026, 9537, 2499...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,IJKL2345,2025-03-21T04:51:42Z,1,train


# Cast DataFrame `Object` to Supported Feature Store Data Type `String`

In [20]:
def cast_object_to_string(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "object":
            data_frame[label] = data_frame[label].astype("str").astype("string")

In [21]:
cast_object_to_string(df_records)

In [22]:
df_records

Unnamed: 0,input_ids,input_mask,segment_ids,label_id,review_id,date,label,split_type
0,"[101, 1045, 2734, 2019, 1000, 3424, 23350, 100...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4,ABCD12345,2025-03-21T04:51:42Z,5,train
1,"[101, 1996, 3291, 2007, 10777, 23663, 2003, 20...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2,EFGH12345,2025-03-21T04:51:42Z,3,train
2,"[101, 6659, 1010, 3904, 1997, 2026, 9537, 2499...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,IJKL2345,2025-03-21T04:51:42Z,1,train


In [23]:
feature_group.ingest(data_frame=df_records, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='reviews-feature-group-21-04-51-42', feature_definitions={'input_ids': {'FeatureName': 'input_ids', 'FeatureType': 'String'}, 'input_mask': {'FeatureName': 'input_mask', 'FeatureType': 'String'}, 'segment_ids': {'FeatureName': 'segment_ids', 'FeatureType': 'String'}, 'label_id': {'FeatureName': 'label_id', 'FeatureType': 'Integral'}, 'review_id': {'FeatureName': 'review_id', 'FeatureType': 'String'}, 'date': {'FeatureName': 'date', 'FeatureType': 'String'}, 'label': {'FeatureName': 'label', 'FeatureType': 'Integral'}, 'split_type': {'FeatureName': 'split_type', 'FeatureType': 'String'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f637f4a5bd0>, sagemaker_session=<sagemaker.session.Session object at 0x7f63bff91250>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f633ee15d50>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

## Wait For Data In Offline Feature Store To Become Available

Creating a feature group takes time as the data is loaded. We will need to wait until it is created before we can use it. 

In [None]:
offline_store_contents = None

while offline_store_contents is None:
    objects_in_bucket = s3.list_objects(Bucket=bucket, Prefix=prefix)
    if "Contents" in objects_in_bucket and len(objects_in_bucket["Contents"]) > 1:
        offline_store_contents = objects_in_bucket["Contents"]
    else:
        print("Waiting for data in offline store...\n")
        sleep(60)

print("Data available.")

Waiting for data in offline store...



## _Wait For The Cell Above To Complete and show `Data available`._

## Get Record From Online Feature Store

Use for OnlineStore serving from a FeatureStore. Only the latest records stored in the OnlineStore can be retrieved. If no Record with `RecordIdentifierValue` is found, then an empty result is returned.

In [None]:
# record_identifier_value = "IJKL2345"

# featurestore_runtime.get_record(
#     FeatureGroupName=feature_group_name, RecordIdentifierValueAsString=record_identifier_value
# )

# Build Training Dataset

SageMaker FeatureStore automatically builds the Glue Data Catalog for FeatureGroups (we can optionally turn it on/off while creating the FeatureGroup). We can create a training dataset by querying the data in the feature store. This is done by utilizing the auto-built Catalog and run an Athena query. 

# Create An Athena Query

In [None]:
feature_store_query = feature_group.athena_query()

# Get The Feature Group Table Name

In [None]:
feature_store_table = feature_store_query.table_name

# Build an Athena SQL Query

Show Hive DDL commands to define or change structure of tables or databases in Hive. The schema of the table is generated based on the feature definitions. Columns are named after feature name and data-type are inferred based on feature type. 

Integral feature type is mapped to INT data-type. Fractional feature type is mapped to FLOAT data-type. String feature type is mapped to STRING data-type.

In [None]:
print(feature_group.as_hive_ddl())

In [None]:
query_string = """
SELECT input_ids, input_mask, segment_ids, label_id, split_type  FROM "{}" WHERE split_type='train' LIMIT 5
""".format(
    feature_store_table
)

print("Running " + query_string)

## Run Athena Query
The query results are stored in a S3 bucket.

In [None]:
feature_store_query.run(query_string=query_string, output_location="s3://" + bucket + "/" + prefix + "/query_results/")

feature_store_query.wait()

## View Query Results

Load query results in a Pandas DataFrame.

In [None]:
dataset = pd.DataFrame()

dataset = feature_store_query.as_dataframe()

dataset

# Review the Feature Store

![Feature Store](img/feature_store_sm_extension.png)

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}