In [0]:
df_bronze_airline = spark.table("`1team-postgresql-connection_catalog`.bronze.bronze_airline_202307_202506")
df_bronze_weekday = spark.table("`1team-postgresql-connection_catalog`.bronze.bronze_weekday_202307_202506")
df_bronze = df_bronze_airline
display(df_bronze)

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# 0. 월별(연,월) 윈도우 정의
w_month = Window.partitionBy("year", "month")

# 1. 항공편당 평균 여객 수 컬럼 추가
df_bronze = df_bronze.withColumn(
    "avg_passenger_per_flight",
    F.col("passenger_total") / F.col("flight_total")
)

# 2. 항공편당 평균 화물량 컬럼 추가
df_bronze = df_bronze.withColumn(
    "avg_cargo_per_flight",
    F.col("cargo_total") / F.col("flight_total")
)

# 3. 월별 전체 flight_total 평균을 활용한 혼잡도(congestion_index) 컬럼 추가
df_bronze = df_bronze.withColumn(
    "month_flight_total_mean",
    F.avg("flight_total").over(w_month)
).withColumn(
    "congestion_index",
    F.col("flight_total") / F.col("month_flight_total_mean")
).drop("month_flight_total_mean")

# 4. 월별 전체 flight_total 합계를 활용한 항공사 운항 점유율(airline_flight_share) 컬럼 추가
df_bronze = df_bronze.withColumn(
    "month_flight_total_sum",
    F.sum("flight_total").over(w_month)
).withColumn(
    "airline_flight_share",
    F.col("flight_total") / F.col("month_flight_total_sum")
).drop("month_flight_total_sum")

# 5. 날짜, 항공사 컬럼 문자열(varchar) 변환 및 공백 제거
df_bronze = df_bronze \
    .withColumn("year", F.trim(F.col("year")).cast("string")) \
    .withColumn("month", F.lpad(F.trim(F.col("month")).cast("string"), 2, "0")) \
    .withColumn("airline", F.trim(F.col("airline")).cast("string"))

# 6. date 컬럼(YYYYMMDD, 1일 고정) 추가
df_bronze = df_bronze.withColumn(
    "date",
    F.concat(F.col("year"), F.col("month"), F.lit("01"))
)

# 7. date를 맨 앞 컬럼으로 이동
cols = ["date"] + [c for c in df_bronze.columns if c != "date"]
df_bronze = df_bronze.select(*cols)

df_bronze.printSchema()
display(df_bronze)


In [0]:
df_bronze.write \
    .format("jdbc") \
    .option("url", "jdbc:postgresql://1dt-2nd-team1-postgres.postgres.database.azure.com:5432/postgres") \
    .option("dbtable", "silver.silver_airline_202307_202506") \
    .option("user", "azureuser") \
    .option("password", "asdASD123!@#") \
    .option("driver", "org.postgresql.Driver") \
    .option("sslmode", "require") \
    .mode("overwrite") \
    .save()


In [0]:
df_bronze = df_bronze_weekday

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# 0. 월별(연,월) 윈도우 정의
w_month = Window.partitionBy("year", "month")

# 1. 항공편당 평균 여객 수 컬럼 추가
df_bronze = df_bronze.withColumn(
    "avg_passenger_per_flight",
    F.col("passenger_total") / F.col("flight_total")
)

# 2. 항공편당 평균 화물량 컬럼 추가
df_bronze = df_bronze.withColumn(
    "avg_cargo_per_flight",
    F.col("cargo_total") / F.col("flight_total")
)

# 3. 월별 전체 flight_total 평균을 활용한 혼잡도(congestion_index) 컬럼 추가
df_bronze = df_bronze.withColumn(
    "month_flight_total_mean",
    F.avg("flight_total").over(w_month)
).withColumn(
    "congestion_index",
    F.col("flight_total") / F.col("month_flight_total_mean")
).drop("month_flight_total_mean")

# 4. 월별 전체 flight_total 합계를 활용한 요일별 운항 점유율(weekday_flight_share) 컬럼 추가
df_bronze = df_bronze.withColumn(
    "month_flight_total_sum",
    F.sum("flight_total").over(w_month)
).withColumn(
    "weekday_flight_share",
    F.col("flight_total") / F.col("month_flight_total_sum")
).drop("month_flight_total_sum")

# 5. 날짜, 요일 컬럼 문자열(varchar) 변환 및 공백 제거
df_bronze = df_bronze \
    .withColumn("year", F.trim(F.col("year")).cast("string")) \
    .withColumn("month", F.lpad(F.trim(F.col("month")).cast("string"), 2, "0")) \
    .withColumn("weekday", F.trim(F.col("weekday")).cast("string"))

# 6. date 컬럼(YYYYMMDD, 1일 고정) 추가
df_bronze = df_bronze.withColumn(
    "date",
    F.concat(F.col("year"), F.col("month"), F.lit("01"))
)

# 7. date를 맨 앞 컬럼으로 이동
cols = ["date"] + [c for c in df_bronze.columns if c != "date"]
df_bronze = df_bronze.select(*cols)

df_bronze.printSchema()
display(df_bronze)


In [0]:
display(df_bronze)

In [0]:
df_bronze.write \
    .format("jdbc") \
    .option("url", "jdbc:postgresql://1dt-2nd-team1-postgres.postgres.database.azure.com:5432/postgres") \
    .option("dbtable", "silver.silver_weekday_202307_202506") \
    .option("user", "azureuser") \
    .option("password", "asdASD123!@#") \
    .option("driver", "org.postgresql.Driver") \
    .option("sslmode", "require") \
    .mode("overwrite") \
    .save()