# Generating a Synthetic Dataset for Anomaly Detection Experiments

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
import random

from string import ascii_uppercase
from random import randint, choice

In [None]:
def generate_normal_point():
    return randint(0,10)

def generate_abnormal_point():
    return randint(70,80)

def normal_or_abnormal():
    tmp = randint(0,20)
    
    if tmp == 20:
        return "abnormal"
    else:
        return "normal"
    
def generate_random_string():
    letters = ascii_uppercase
    
    return ''.join(choice(letters) for i in range(10))

In [None]:
list_of_points = []

for _ in range (0,1000):
    point_type = normal_or_abnormal()
    
    point_value = 0
    string_value = generate_random_string()
    
    if point_type == "normal":
        point_value = generate_normal_point()
    else:
        point_value = generate_abnormal_point()
    
    point = {
        "label": string_value,
        "value": point_value
    }
    
    list_of_points.append(point)

In [None]:
list_of_points

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [15, 5]
pd.DataFrame(list_of_points).plot()

In [None]:
!rm -rf s3_files
!mkdir -p s3_files

In [None]:
import json

def save_json_file(point):
    label = point['label']
    filename = "s3_files/" + label + '.json'
    with open(filename, 'w') as file:
        json.dump(point, file)
        print(f"Saved {label}!")

In [None]:
for point in list_of_points:
    save_json_file(point)

In [None]:
bucket_name = "sagemaker-cookbook-anomaly-detection-data-bucket"

In [None]:
!aws s3 mb s3://{bucket_name}

In [None]:
!aws s3 cp s3_files/ s3://{bucket_name}/ --recursive

In [None]:
%store list_of_points

In [None]:
athena_bucket_name = bucket_name
%store athena_bucket_name