In [1]:
import boto3

In [2]:
import boto3

def list_glue_tables(database_name):
    # Create a Glue client
    glue_client = boto3.client('glue')

    # Initialize a paginator to handle large number of tables
    paginator = glue_client.get_paginator('get_tables')

    # Paginate through the tables in the specified database
    page_iterator = paginator.paginate(DatabaseName=database_name)

    # Collect and print table names
    table_names = []
    for page in page_iterator:
        for table in page['TableList']:
            table_names.append(table['Name'])

    # Print the table names
    if table_names:
        print(f"Tables in database '{database_name}':")
        for table_name in table_names:
            print(table_name)
    else:
        print(f"No tables found in database '{database_name}'.")

if __name__ == "__main__":
    # Replace 'your-database-name' with the actual database name
    database_name = 'your-database-name'
    list_glue_tables(database_name)

EntityNotFoundException: An error occurred (EntityNotFoundException) when calling the GetTables operation: Database your-database-name not found.

In [3]:
# Glue list database tables

def list_glue_databases():
    # Create a Glue client
    glue_client = boto3.client('glue')

    # Get the list of databases
    response = glue_client.get_databases()

    # Print the database names
    if 'DatabaseList' in response:
        print("Databases:")
        for db in response['DatabaseList']:
            print(db['Name'])
    else:
        print("No databases found.")



In [4]:
list_glue_databases()

Databases:
default
man_vehicledatalake_prod_gluedatabase_iwh_datahub
steering_control_data_mart
steering_control_data_raw
steering_control_data_staging


In [5]:
list_glue_tables('steering_control_data_raw')

Tables in database 'steering_control_data_raw':
aenderungsnr
arbvorrat_sd53
av_steuerungsdaten
emcprj
kast_bedarfe
kast_bedarfe_ebcdic
sachstamm
segh1001
stueli
tei01t
teieak
teieiq


In [7]:
# Get table details
def get_table_details(database_name, table_name):
    # Create a Glue client
    glue_client = boto3.client('glue')

    # Get the table details
    response = glue_client.get_table(DatabaseName=database_name, Name=table_name)

    # Print the table details
    if 'Table' in response:
        table = response['Table']
        print(f"Table details for '{table_name}' in database '{database_name}':")
        print(f"Table Type: {table['TableType']}")
        print(f"Storage Descriptor Location: {table['StorageDescriptor']['Location']}")
        print(f"Columns:")
        for column in table['StorageDescriptor']['Columns']:
            print(f"  {column['Name']} ({column['Type']})")
    else:
        print(f"Table '{table_name}' not found in database '{database_name}'.")

In [18]:
get_table_details('steering_control_data_raw', 'arbvorrat_sd53')

Table details for 'arbvorrat_sd53' in database 'steering_control_data_raw':
Table Type: EXTERNAL_TABLE
Storage Descriptor Location: s3://man-vehicledatalake-prod-data-iwh/datahub/ARBVORRAT_SD53
Columns:
  akkz (string)
  bk_id (string)
  bk_prod (string)
  eins_key (decimal(4,0))
  posp (decimal(8,0))
  posg (decimal(8,0))
  werk_ausf (string)
  werk_zust (string)
  aendnr_hinzu (string)
  aendnr_entf (string)
  bearb_kz (string)
  pruefkz (string)
  drukz (string)
  kswkz (string)
  erskz (string)
  wdat (string)
  aedat (string)
  aeuser (string)
  pgm_name (string)
  kast_kz (string)
  possnr (string)
  rnum (decimal(10,0))


In [11]:
# Get files in S3 bucket path

def list_files_in_bucket(bucket_name, prefix: str):
    # Create an S3 client
    s3_client = boto3.client('s3')

    # List objects in the bucket
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

    # Print the object keys
    if 'Contents' in response:
        print(f"Files in bucket '{bucket_name}':")
        for obj in response['Contents']:
            print(obj['Key'])
    else:
        print(f"No files found in bucket '{bucket_name}'.")

In [20]:
list_files_in_bucket('man-vehicledatalake-prod-data-iwh', 'datahub/ARBVORRAT_SD53')

Files in bucket 'man-vehicledatalake-prod-data-iwh':
datahub/ARBVORRAT_SD53/snap=2022_02_22/part-00000-d8df0bfc-c8a4-4b49-b9e6-6e2408a88aa8-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_23/part-00000-aa33fdfb-ba75-477d-8649-fad9aac70860-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_24/part-00000-96883b70-752f-47be-ad23-fc817a2cd97b-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_25/part-00000-24445a73-25a3-4a78-8e8f-a9eb03be4d5b-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_26/part-00000-cf19a983-f3bd-4d99-a549-479c91587797-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_27/part-00000-c692491e-f44c-4d52-813e-d47a0e27e0a1-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_02_28/part-00000-1cc07880-ba14-4d3b-826f-0133692aa890-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_03_01/part-00000-8ee5324a-d602-4129-90c1-f539185dbe1a-c000.snappy.parquet
datahub/ARBVORRAT_SD53/snap=2022_03_02/part-00000-9536a862-f724-46eb-bc22-3c9c12c0bd91-c000

In [14]:
# Get file details

def get_file_details(bucket_name, key):
    # Create an S3 client
    s3_client = boto3.client('s3')

    # Get the object details
    response = s3_client.head_object(Bucket=bucket_name, Key=key)

    # Print the object details
    print(f"File details for '{key}' in bucket '{bucket_name}':")
    print(f"Size: {response['ContentLength']} bytes")
    print(f"Last Modified: {response['LastModified']}")

In [21]:
get_file_details('man-vehicledatalake-prod-data-iwh', 'datahub/ARBVORRAT_SD53/snap=2024_09_03/part-00000-1104211f-7e38-4fce-b1a6-4fd381067afa-c000.snappy.parquet')

File details for 'datahub/ARBVORRAT_SD53/snap=2024_09_03/part-00000-1104211f-7e38-4fce-b1a6-4fd381067afa-c000.snappy.parquet' in bucket 'man-vehicledatalake-prod-data-iwh':
Size: 7311619 bytes
Last Modified: 2024-09-03 03:42:22+00:00
