In [1]:
import polars as pl
pl.Config.set_tbl_rows(50)
pl.Config.set_fmt_str_lengths(100)
df = pl.read_parquet('CurProcessorExport-00001.snappy.parquet') ## data from Nov 11th for this account
# df = pl.read_parquet('./ollion-sandbox-cur.snappy.parquet')


## EC2 Products

In [2]:
ec2_non_null_df = df.filter((pl.col('line_item_resource_id').is_not_null())
                            & (pl.col('line_item_product_code').eq('AmazonEC2'))
                            & (pl.col('product_region_code').is_not_null()))
triplets_df = ec2_non_null_df.select(["line_item_product_code", "product_region_code", "line_item_resource_id"])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])
triplets_df

# EBS snapshots ids follow arns with :snapshot/ - Snapshot ID
# EBS volumes start with vol- - Volume ID
# EC2 instances start with i- - Instance ID
# nat gateways ids follow arns with :natgateway/ - Nat Gateway ID
# All else will be marked as Unknown
resource_id_df = triplets_df.with_columns(
    resource_id_type = pl.when(pl.col('line_item_resource_id').str.contains(':snapshot/'))
    .then(pl.lit('Snapshot ID'))
    .when(pl.col('line_item_resource_id').str.starts_with('vol-'))
    .then(pl.lit('Volume ID'))
    .when(pl.col('line_item_resource_id').str.starts_with('i-'))
    .then(pl.lit('Instance ID'))
    .when(pl.col('line_item_resource_id').str.contains(':natgateway/'))
    .then(pl.lit('Nat Gateway ID'))
    .otherwise(pl.lit('Unknown'))
)
# use regex to extract the id from the arn
resource_id_df = resource_id_df.with_columns(
    pl.col("line_item_resource_id").str.replace_all(r'.*:snapshot/', "")
)
resource_id_df = resource_id_df.with_columns(
    pl.col("line_item_resource_id").str.replace_all(r'.*:natgateway/', "")
)
resource_id_df

line_item_product_code,product_region_code,line_item_resource_id,resource_id_type
str,str,str,str
"""AmazonEC2""","""us-east-1""","""vol-098da221120cecd24""","""Volume ID"""
"""AmazonEC2""","""us-east-1""","""snap-0a8104bd639a9e280""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-0213191263e290cd3""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-03ee2112634a82ed4""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-0a1081a80a34e83dc""","""Snapshot ID"""
"""AmazonEC2""","""us-west-2""","""i-00bae9112afebfb94""","""Instance ID"""
"""AmazonEC2""","""us-east-1""","""snap-0bca88c428e5165fe""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-086b34369fa7f0887""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-0d5a827c20afc7585""","""Snapshot ID"""
"""AmazonEC2""","""us-east-1""","""snap-0f167e1d23a60e15c""","""Snapshot ID"""


## S3 Products

In [3]:
s3_non_null_df = df.filter((pl.col('line_item_resource_id').is_not_null())
                            & (pl.col('line_item_product_code').eq('AmazonS3'))
                            & (pl.col('product_region_code').is_not_null()))
triplets_df = s3_non_null_df.select(["line_item_product_code", "product_region_code", "line_item_resource_id"])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])
triplets_df

# S3 bucket names are the resource id
resource_id_df = triplets_df.with_columns(
    resource_id_type = pl.lit('Bucket Name')
)
resource_id_df

line_item_product_code,product_region_code,line_item_resource_id,resource_id_type
str,str,str,str
"""AmazonS3""","""us-east-2""","""2w-ingestion-engine-snowpipe-data-stage""","""Bucket Name"""
"""AmazonS3""","""us-east-1""","""cloudvod-logsbucket-1b0h2h4fegl28-logs6819bb44-pxgkuthpxe9r""","""Bucket Name"""
"""AmazonS3""","""us-east-1""","""aws-workspaces-latency-test20241017161752423400000001""","""Bucket Name"""
"""AmazonS3""","""us-west-2""","""a-test-bucket-ot-ss-raw-no-key""","""Bucket Name"""
"""AmazonS3""","""us-east-1""","""sagemaker-us-east-1-187940856853""","""Bucket Name"""
"""AmazonS3""","""us-west-1""","""fintech-eks-terraform-bucket""","""Bucket Name"""
"""AmazonS3""","""us-east-1""","""winson-test-sgw-bucket-1""","""Bucket Name"""
"""AmazonS3""","""us-east-1""","""187940856853-us-east-1-athena-results-bucket-xgmzjqwh2c""","""Bucket Name"""
"""AmazonS3""","""us-east-2""","""testbucket2222333332222222""","""Bucket Name"""
"""AmazonS3""","""us-east-2""","""customizationsforctsolut-customcontroltowers3acce-1vbft8g61r8hz""","""Bucket Name"""


##

## Lambda Products

In [4]:
## get triplets of product code, region code, and resource id for Usage and SavingsPlanCoveredUsage
lam_df = df.filter((pl.col('line_item_product_code').eq('AWSLambda')) &
                (pl.col('product_region_code').is_not_null()) &
                (pl.col('line_item_resource_id').is_not_null())
)
triplets_df = lam_df.select(["line_item_product_code", "product_region_code", "line_item_resource_id"])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])

# Lambda tags are extracted based on the full arn in the resource id
resource_id_df = triplets_df.with_columns(
    resource_id_type = pl.lit('Lambda ARN')
)
resource_id_df

line_item_product_code,product_region_code,line_item_resource_id,resource_id_type
str,str,str,str
"""AWSLambda""","""us-west-2""","""arn:aws:lambda:us-west-2:187940856853:function:cwsyn-carlsjr-offers-west-2-1f6976a4-b647-47a2-88a4-3…","""Lambda ARN"""
"""AWSLambda""","""us-east-2""","""arn:aws:lambda:us-east-2:187940856853:function:cwsyn-hardees-join-east-2-1a5d8ca7-d693-40e7-81cc-818…","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:identify_differences_fn""","""Lambda ARN"""
"""AWSLambda""","""us-east-2""","""arn:aws:lambda:us-east-2:187940856853:function:cwsyn-ckr-contact-us-east-2-4c254993-79db-4262-ad5b-9…","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:dynamo_session_record_fn""","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:CloudWatchAutoAlarms""","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:doc_ingestion_fn""","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:CdkStepdefnStarterStack-ScheduledLambdaScheduledLa-5L…","""Lambda ARN"""
"""AWSLambda""","""us-east-1""","""arn:aws:lambda:us-east-1:187940856853:function:imageBuildFailedTest""","""Lambda ARN"""
"""AWSLambda""","""us-west-1""","""arn:aws:lambda:us-west-1:187940856853:function:cwsyn-carlsjr-order-west-1-0a130140-2c70-42f6-bf08-af…","""Lambda ARN"""


## RDS Products

In [5]:
## get triplets of product code, region code, and resource id for Usage and SavingsPlanCoveredUsage
rds_df = df.filter((pl.col('line_item_product_code').eq('AmazonRDS')) &
                (pl.col('product_region_code').is_not_null()) &
                (pl.col('line_item_resource_id').is_not_null())
)
# usage_df = rds_df.filter(
#     (pl.col('line_item_line_item_type').is_in(['Usage', 'DiscountedUsage', 'SavingPlanCoveredUsage'])) &
#     (~pl.col('line_item_usage_type').str.contains('Bytes'))
# )
triplets_df = rds_df.select(["line_item_product_code",
                            "product_region_code",
                            "line_item_resource_id"
                            ])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])
triplets_df

resource_id_df = triplets_df.with_columns(
    resource_id_type = pl.when(pl.col('line_item_resource_id').str.contains(':cluster-snapshot:'))
    .then(pl.lit('Snapshot ARN'))
    .when(pl.col('line_item_resource_id').str.contains(':db:'))
    .then(pl.lit('Database ARN'))
    .when(pl.col('line_item_resource_id').str.contains(':cluster:'))
    .then(pl.lit('Cluster ARN'))
    .otherwise(pl.lit('Unknown'))
)
resource_id_df

line_item_product_code,product_region_code,line_item_resource_id,resource_id_type
str,str,str,str
"""AmazonRDS""","""us-east-2""","""arn:aws:rds:us-east-2:187940856853:cluster:cluster-t3hrwc4gf3zkly36x5diw5u6di""","""Cluster ARN"""
"""AmazonRDS""","""us-east-1""","""arn:aws:rds:us-east-1:187940856853:db:db-sql-rds-express""","""Database ARN"""
"""AmazonRDS""","""us-east-1""","""arn:aws:rds:us-east-1:187940856853:db:database-1""","""Database ARN"""
"""AmazonRDS""","""eu-north-1""","""arn:aws:rds:eu-north-1:187940856853:db:database-1-instance-1""","""Database ARN"""
"""AmazonRDS""","""eu-north-1""","""arn:aws:rds:eu-north-1:187940856853:cluster:cluster-6tcvzlm3edthuwrnqvtmxrq5na""","""Cluster ARN"""
"""AmazonRDS""","""us-east-1""","""arn:aws:rds:us-east-1:187940856853:db:sql-rds-enterprise-not-encrypted""","""Database ARN"""
"""AmazonRDS""","""us-west-2""","""arn:aws:rds:us-west-2:187940856853:db:dn-wordpress-database-replica""","""Database ARN"""
"""AmazonRDS""","""eu-north-1""","""arn:aws:rds:eu-north-1:187940856853:cluster-snapshot:database-1-final-snapshot""","""Snapshot ARN"""
"""AmazonRDS""","""us-east-2""","""arn:aws:rds:us-east-2:187940856853:cluster-snapshot:zw-poc-db-final-snapshot""","""Snapshot ARN"""
"""AmazonRDS""","""us-east-1""","""arn:aws:rds:us-east-1:187940856853:db:db-2-encrypted-restored""","""Database ARN"""


## Redshift Products

In [6]:
red_df = df.filter((pl.col('line_item_product_code').eq('AmazonRedshift')) &
                (pl.col('product_region_code').is_not_null()) &
                (pl.col('line_item_resource_id').is_not_null())
)
triplets_df = red_df.select(["line_item_product_code", "product_region_code", "line_item_resource_id"])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])
triplets_df

resource_id_df = triplets_df.with_columns(
    resource_id_type = pl.when(pl.col('line_item_resource_id').str.contains(':cluster:'))
    .then(pl.lit('Cluster ARN'))
    .when(pl.col('line_item_resource_id').str.contains(':redshift-serverless:'))
    .then(pl.lit('Serverless ARN'))
    .otherwise(pl.lit('Unknown'))
)
resource_id_df

line_item_product_code,product_region_code,line_item_resource_id,resource_id_type
str,str,str,str
"""AmazonRedshift""","""us-west-2""","""arn:aws:redshift:us-west-2:187940856853:cluster:redshift-cluster-1""","""Cluster ARN"""
"""AmazonRedshift""","""us-east-1""","""arn:aws:redshift:us-east-1:187940856853:cluster:redshift-cluster-1""","""Cluster ARN"""
"""AmazonRedshift""","""us-east-1""","""arn:aws:redshift-serverless:us-east-1:187940856853:namespace/7005ab5e-d1da-457f-a1d8-508e04c36eb3""","""Serverless ARN"""
"""AmazonRedshift""","""us-east-1""","""arn:aws:redshift:us-east-1:999537480004:cluster:source-to-65127672""","""Cluster ARN"""


## ECR Products

In [7]:
ecr_df = df.filter((pl.col('line_item_product_code').eq('AmazonECR')) &
                (pl.col('product_region_code').is_not_null()) &
                (pl.col('line_item_resource_id').is_not_null())
)
triplets_df = ecr_df.select(["line_item_product_code", "product_region_code", "line_item_resource_id"])
triplets_df = triplets_df.unique()
triplets_df.sort(["product_region_code", "line_item_resource_id"])
triplets_df

# resource_id_df = triplets_df.with_columns(
#     resource_id_type = pl.when(pl.col('line_item_resource_id').str.contains(':cluster:'))
#     .then(pl.lit('Cluster ARN'))
#     .when(pl.col('line_item_resource_id').str.contains(':redshift-serverless:'))
#     .then(pl.lit('Serverless ARN'))
#     .otherwise(pl.lit('Unknown'))
# )
# resource_id_df

line_item_product_code,product_region_code,line_item_resource_id
str,str,str
"""AmazonECR""","""ca-central-1""","""arn:aws:ecr:ca-central-1:187940856853:repository/ssmrepair841fc95a/ssmrepairfunctiona754f442repo"""
"""AmazonECR""","""ap-southeast-1""","""arn:aws:ecr:ap-southeast-1:187940856853:repository/rey-ecr"""
"""AmazonECR""","""us-east-1""","""arn:aws:ecr:us-east-1:187940856853:repository/ccrepl"""
"""AmazonECR""","""us-west-2""","""arn:aws:ecr:us-west-2:187940856853:repository/focus_billing_pipeline_etl"""
"""AmazonECR""","""us-east-1""","""arn:aws:ecr:us-east-1:187940856853:repository/cdk-hnb659fds-container-assets-187940856853-us-east-1"""
"""AmazonECR""","""us-east-1""","""arn:aws:ecr:us-east-1:187940856853:repository/say-something"""
"""AmazonECR""","""us-west-2""","""arn:aws:ecr:us-west-2:187940856853:repository/focus_convert"""
"""AmazonECR""","""us-east-1""","""arn:aws:ecr:us-east-1:187940856853:repository/ccrepl2"""
