# Motor Theft Vehicle 
## Database(PostgreSQL) to Raw Layer(JSON)

####  Run this cell to set up and start your interactive session.


In [1]:
%idle_timeout 10
%glue_version 5.0
%worker_type G.1X
%number_of_workers 2

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql.functions import * 
from pyspark.sql.types import * 
from awsglue import DynamicFrame
import json 
from datetime import date
import boto3
from botocore.exceptions import ClientError
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 1.0.7 
Current idle_timeout is None minutes.
idle_timeout has been set to 10 minutes.
Setting Glue version to: 5.0
Previous worker type: None
Setting new worker type to: G.1X
Previous number of workers: None
Setting new number of workers to: 2
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 2
Idle Timeout: 10
Session ID: 33455532-eccc-41b1-a5dc-2d636952156f
Applying the following default arguments:
--glue_kernel_version 1.0.7
--enable-glue-datacatalog true
Waiting for session 33455532-eccc-41b1-a5dc-2d636952156f to get into ready status...
Session 33455532-eccc-41b1-a5dc-2d636952156f has be

In [2]:
todays_date = date.today().strftime('%Y-%m-%d')
base_raw_path = 's3://motor-theft-vehicles-bucket/raw/'
file_name = f"london_incidents"




In [3]:
def get_secret():
    secret_name = "dev/motor_theft_vehicles/postgresql"
    region_name = "us-east-1"
    client = boto3.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        raise e
    db_config = get_secret_value_response['SecretString']
    return db_config

db_config = json.loads(get_secret())




In [5]:
jdbc_url = (
    f"jdbc:postgresql://{db_config['host']}:{db_config['port']}/{db_config['dbname']}"
)
query = f"(SELECT * FROM tbl_motor_theft_vehicles WHERE to_date(report_datetime ,'YYYY-MM-DD')='{todays_date}') tbl"
df = spark.read.format("jdbc").options(
    url=jdbc_url,
    driver=db_config['driver'],
    dbtable=query,
    user= db_config["user"],
    password= db_config["password"]
).load()




In [6]:
df.show() 

+-----------+-------------+-------------------+-------------------+--------------------+------+-------+-------+----------+------------+--------------+------------+-----------------+---------------+-------------------+--------------------+-------------------+-------------------+---------------+
|incident_id|report_number|    report_datetime|occurrence_datetime|         addressline|  city|  state|zipcode|vehicle_id|vehicle_year| vehicle_color|licenseplate|              vin|recovery_status|      recovery_date|             borough|        data_source|    incident_status|method_of_entry|
+-----------+-------------+-------------------+-------------------+--------------------+------+-------+-------+----------+------------+--------------+------------+-----------------+---------------+-------------------+--------------------+-------------------+-------------------+---------------+
|    INC1486|     LON16871|2025-11-02 12:05:40|2025-11-01 12:05:40|    5747 Curry Place|London|England|  46936|    

In [7]:
df = df.withColumn('source_file_name',lit(file_name))
df = df.withColumn('load_timestamp',lit(current_timestamp()))
df = df.withColumn('report_date', to_date('report_datetime')) 




In [8]:
s3output = glueContext.getSink(
    path=base_raw_path+file_name,
    connection_type="s3",
    updateBehavior="UPDATE_IN_DATABASE",
    partitionKeys=['report_date'],
    enableUpdateCatalog=True
)
s3output.setCatalogInfo(
    catalogDatabase="motor_theft_vehicles", catalogTableName="raw_motor_theft_vehicles"
)
s3output.setFormat("csv")
s3output.writeFrame(DynamicFrame.fromDF(df, glueContext))


<awsglue.dynamicframe.DynamicFrame object at 0x7f88a3ec9a90>
