<font size="+2"><b><center>Eprog OV Data Pipeline</center></b></font>

### Imports

In [12]:
%load_ext autoreload
%autoreload 2
import os
import datetime
import pandas as pd
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 100)
from pyspark.sql import functions as F

from crf0a_app.configuration import spark_config
from crf0a_app.utils import system

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Spark session

In [13]:
spark_context, spark_session = spark_config.get_spark(
    app_name="[crf0A] Write to Exadata",
    driver_cores=1,
    driver_mem="4g",
    max_executors=8,
    executor_cores=4,
    executor_mem="4g"
)

### Exadata

#### Instantiate OracleDatabase object

In [14]:
from crf0a_app.infra.oracle_database import OracleDatabase

In [15]:
oracle_db = OracleDatabase(dialect="jdbc", spark_session=spark_session)

In [37]:
from datetime import date

#### Read

In [49]:
today = str(date.today())
cycle = today[0:4]+today[5:7]

In [50]:

# SQL query
PROD_DEM = f"""
(/* Query for PROD DEM / OV */
SELECT
    a1.grep_libelle_pp_22    program_country,
    a1.fa1_code_famille_12   family,
    '01'
    || '/'
    || a1.pe1_numero_15
    || '/'
    || a1.pe1_annee_14 monthyear,
    'PROD DEM' measure,
    SUM(a1.vpn_volume_total) AS valeur,
    to_char(sysdate) inserted_date,
    a1.ca1_cycle_11          cycle
FROM
    (
        SELECT
            a3.vpn_id_pays_unitaire_0           vpn_id_pays_unitaire,
            a3.vpn_code_marque_1                vpn_code_marque_1,
            a3.vpn_num_cycle_2                  vpn_num_cycle,
            a3.vpn_annee_cycle_3                vpn_annee_cycle,
            a3.vpn_periode_4                    vpn_periode,
            a3.vpn_est_bascule_6                vpn_est_bascule_6,
            a3.ca1_code_marque_7                ca1_code_marque,
            a3.ca1_annee_cycle_8                ca1_annee_cycle,
            a3.ca1_numero_cycle_9               ca1_numero_cycle,
            a3.ca1_code_zone_production_10      ca1_code_zone_production_10,
            a3.ca1_cycle_11                     ca1_cycle_11,
            a3.fa1_code_famille_12              fa1_code_famille_12,
            a3.fa1_code_marque_13               fa1_code_marque,
            a3.pe1_annee_14                     pe1_annee_14,
            a3.pe1_numero_15                    pe1_numero_15,
            a3.pe1_code_zone_production_16      pe1_code_zone_production_16,
            a3.pe1_periode_17                   pe1_periode,
            a3.pu1_id_pays_unitaire_18          pu1_id_pays_unitaire,
            a3.pu1_code_psv_pays_programme_19   pu1_code_psv_pays_programme,
            a2.grep_libelle_groupe_reporting    grep_libelle_groupe_reporting_20,
            a2.grep_code_psv_pays_programme     grep_code_psv_pays_programme,
            a2.grep_libelle_pp                  grep_libelle_pp_22,
            a3.vpn_volume_total
        FROM
            (
                SELECT
                    a5.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                    a5.vpn_code_marque_1             vpn_code_marque_1,
                    a5.vpn_num_cycle_2               vpn_num_cycle_2,
                    a5.vpn_annee_cycle_3             vpn_annee_cycle_3,
                    a5.vpn_periode_4                 vpn_periode_4,
                    a5.vpn_est_bascule_6             vpn_est_bascule_6,
                    a5.ca1_code_marque_7             ca1_code_marque_7,
                    a5.ca1_annee_cycle_8             ca1_annee_cycle_8,
                    a5.ca1_numero_cycle_9            ca1_numero_cycle_9,
                    a5.ca1_code_zone_production_10   ca1_code_zone_production_10,
                    a5.ca1_cycle_11                  ca1_cycle_11,
                    a5.fa1_code_famille_12           fa1_code_famille_12,
                    a5.fa1_code_marque_13            fa1_code_marque_13,
                    a5.pe1_annee_14                  pe1_annee_14,
                    a5.pe1_numero_15                 pe1_numero_15,
                    a5.pe1_code_zone_production_16   pe1_code_zone_production_16,
                    a5.pe1_periode_17                pe1_periode_17,
                    a4.pu1_id_pays_unitaire          pu1_id_pays_unitaire_18,
                    a4.pu1_code_psv_pays_programme   pu1_code_psv_pays_programme_19,
                    a5.vpn_volume_total
                FROM
                    (
                        SELECT
                            a7.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                            a7.vpn_code_marque_1             vpn_code_marque_1,
                            a7.vpn_num_cycle_2               vpn_num_cycle_2,
                            a7.vpn_annee_cycle_3             vpn_annee_cycle_3,
                            a7.vpn_periode_4                 vpn_periode_4,
                            a7.vpn_est_bascule_6             vpn_est_bascule_6,
                            a7.ca1_code_marque_7             ca1_code_marque_7,
                            a7.ca1_annee_cycle_8             ca1_annee_cycle_8,
                            a7.ca1_numero_cycle_9            ca1_numero_cycle_9,
                            a7.ca1_code_zone_production_10   ca1_code_zone_production_10,
                            a7.ca1_cycle_11                  ca1_cycle_11,
                            a7.fa1_code_famille_12           fa1_code_famille_12,
                            a7.fa1_code_marque_13            fa1_code_marque_13,
                            a6.pe1_annee                     pe1_annee_14,
                            a6.pe1_numero                    pe1_numero_15,
                            a6.pe1_code_zone_production      pe1_code_zone_production_16,
                            a6.pe1_periode                   pe1_periode_17,
                            a7.vpn_volume_total
                        FROM
                            (
                                SELECT
                                    a9.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                                    a9.vpn_code_marque_1             vpn_code_marque_1,
                                    a9.vpn_num_cycle_2               vpn_num_cycle_2,
                                    a9.vpn_annee_cycle_3             vpn_annee_cycle_3,
                                    a9.vpn_periode_4                 vpn_periode_4,
                                    a9.vpn_est_bascule_6             vpn_est_bascule_6,
                                    a9.ca1_code_marque_7             ca1_code_marque_7,
                                    a9.ca1_annee_cycle_8             ca1_annee_cycle_8,
                                    a9.ca1_numero_cycle_9            ca1_numero_cycle_9,
                                    a9.ca1_code_zone_production_10   ca1_code_zone_production_10,
                                    a9.ca1_cycle_11                  ca1_cycle_11,
                                    a8.fa1_code_famille              fa1_code_famille_12,
                                    a8.fa1_code_marque               fa1_code_marque_13,
                                    a9.vpn_volume_total
                                FROM
                                    (
                                        SELECT
                                            a11.vpn_id_pays_unitaire       vpn_id_pays_unitaire_0,
                                            a11.vpn_code_marque            vpn_code_marque_1,
                                            a11.vpn_num_cycle              vpn_num_cycle_2,
                                            a11.vpn_annee_cycle            vpn_annee_cycle_3,
                                            a11.vpn_periode                vpn_periode_4,
                                            a11.vpn_est_bascule            vpn_est_bascule_6,
                                            a10.ca1_code_marque            ca1_code_marque_7,
                                            a10.ca1_annee_cycle            ca1_annee_cycle_8,
                                            a10.ca1_numero_cycle           ca1_numero_cycle_9,
                                            a10.ca1_code_zone_production   ca1_code_zone_production_10,
                                            a10.ca1_cycle                  ca1_cycle_11,
                                            a11.vpn_volume_total
                                        FROM
                                            brc06_bpg00.rbvqtvpn   a11,
                                            brc06_bpg00.rbvqtca1   a10
                                        WHERE
                                            a10.ca1_code_marque = a11.vpn_code_marque
                                            AND a10.ca1_annee_cycle = a11.vpn_annee_cycle
                                            AND a10.ca1_numero_cycle = a11.vpn_num_cycle
                                    ) a9,
                                    brc06_bpg00.rbvqtfa1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            a8
                                WHERE
                                    a9.vpn_code_marque_1 = a8.fa1_code_marque
                            ) a7,
                            brc06_bpg00.rbvqtpe1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        a6
                        WHERE
                            a7.vpn_periode_4 = a6.pe1_periode
                    ) a5,
                    brc06_bpg00.rbvqtpu1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          a4
                WHERE
                    a5.vpn_id_pays_unitaire_0 = a4.pu1_id_pays_unitaire
            ) a3,
            brc06_bpg00.rbvqtgrep                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     a2
        WHERE
            a3.pu1_code_psv_pays_programme_19 = a2.grep_code_psv_pays_programme
        GROUP BY
            a3.vpn_id_pays_unitaire_0,
            a3.vpn_code_marque_1,
            a3.vpn_num_cycle_2,
            a3.vpn_annee_cycle_3,
            a3.vpn_periode_4,
            a3.vpn_est_bascule_6,
            a3.ca1_code_marque_7,
            a3.ca1_annee_cycle_8,
            a3.ca1_numero_cycle_9,
            a3.ca1_code_zone_production_10,
            a3.ca1_cycle_11,
            a3.fa1_code_famille_12,
            a3.fa1_code_marque_13,
            a3.pe1_annee_14,
            a3.pe1_numero_15,
            a3.pe1_code_zone_production_16,
            a3.pe1_periode_17,
            a3.pu1_id_pays_unitaire_18,
            a3.pu1_code_psv_pays_programme_19,
            a2.grep_libelle_groupe_reporting,
            a2.grep_code_psv_pays_programme,
            a2.grep_libelle_pp,
            a3.vpn_volume_total
    ) a1
WHERE
    a1.ca1_cycle_11 = {cycle}
    AND a1.ca1_code_zone_production_10 LIKE 'PSA'
    AND a1.vpn_est_bascule_6 LIKE 'N'
    AND a1.pe1_code_zone_production_16 LIKE 'PSA'
    AND a1.vpn_code_marque_1 LIKE 'OV'
    AND a1.grep_libelle_groupe_reporting_20 LIKE '500-DMOA'
GROUP BY
    a1.grep_libelle_pp_22,
    a1.fa1_code_famille_12,
    '01'
    || '/'
    || a1.pe1_numero_15
    || '/'
    || a1.pe1_annee_14,
    'PROD DEM',
    to_char(sysdate),
    a1.ca1_cycle_11,
    a1.pe1_annee_14,
    a1.pe1_numero_15
HAVING
    SUM(a1.vpn_volume_total) IS NOT NULL
ORDER BY
    valeur DESC
FETCH FIRST 10 ROWS ONLY
)
"""


In [51]:
# Read data from Oracle
prod_dem_ov = oracle_db.read_df_from_query(PROD_DEM, fetchsize=20000)

In [52]:
prod_dem_ov.persist()

DataFrame[PROGRAM_COUNTRY: string, FAMILY: string, MONTHYEAR: string, MEASURE: string, VALEUR: decimal(38,10), INSERTED_DATE: string, CYCLE: decimal(6,0)]

In [53]:
prod_dem_ov.limit(5).toPandas()

Unnamed: 0,PROGRAM_COUNTRY,FAMILY,MONTHYEAR,MEASURE,VALEUR,INSERTED_DATE,CYCLE
0,TURQUIE,1GQO,01/9/2022,PROD DEM,8223.0,11-JUL-22,202207
1,TURQUIE,2GK0,01/9/2022,PROD DEM,8223.0,11-JUL-22,202207
2,TURQUIE,1G2H,01/9/2022,PROD DEM,8223.0,11-JUL-22,202207
3,TURQUIE,2GON,01/9/2022,PROD DEM,8223.0,11-JUL-22,202207
4,TURQUIE,1GOH,01/9/2022,PROD DEM,8223.0,11-JUL-22,202207


In [36]:
prod_dem_ov.count()

10

In [23]:
# SQL query
PROD_ALL = """(
/* Query for OV / PROD ALL */
SELECT
    a1.grep_libelle_pp_22    program_country,
    a1.fa1_code_famille_12   family,
    '01' || '/' || a1.pe1_numero_15 || '/' || a1.pe1_annee_14 monthyear,
    'PROD ALL' Measure,
    SUM(a1.vpn_volume_alloue_5) VALEUR,
    to_char(sysdate) inserted_date,
    a1.ca1_cycle_11 cycle

FROM
    (
        SELECT
            a3.vpn_id_pays_unitaire_0           vpn_id_pays_unitaire,
            a3.vpn_code_marque_1                vpn_code_marque_1,
            a3.vpn_num_cycle_2                  vpn_num_cycle,
            a3.vpn_annee_cycle_3                vpn_annee_cycle,
            a3.vpn_periode_4                    vpn_periode,
            a3.vpn_volume_alloue_5              vpn_volume_alloue_5,
            a3.vpn_est_bascule_6                vpn_est_bascule_6,
            a3.ca1_code_marque_7                ca1_code_marque,
            a3.ca1_annee_cycle_8                ca1_annee_cycle,
            a3.ca1_numero_cycle_9               ca1_numero_cycle,
            a3.ca1_code_zone_production_10      ca1_code_zone_production_10,
            a3.ca1_cycle_11                     ca1_cycle_11,
            a3.fa1_code_famille_12              fa1_code_famille_12,
            a3.fa1_code_marque_13               fa1_code_marque,
            a3.pe1_annee_14                     pe1_annee_14,
            a3.pe1_numero_15                    pe1_numero_15,
            a3.pe1_code_zone_production_16      pe1_code_zone_production_16,
            a3.pe1_periode_17                   pe1_periode,
            a3.pu1_id_pays_unitaire_18          pu1_id_pays_unitaire,
            a3.pu1_code_psv_pays_programme_19   pu1_code_psv_pays_programme,
            a2.grep_libelle_groupe_reporting    grep_libelle_groupe_reporting_20,
            a2.grep_code_psv_pays_programme     grep_code_psv_pays_programme,
            a2.grep_libelle_pp                  grep_libelle_pp_22
        FROM
            (
                SELECT
                    a5.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                    a5.vpn_code_marque_1             vpn_code_marque_1,
                    a5.vpn_num_cycle_2               vpn_num_cycle_2,
                    a5.vpn_annee_cycle_3             vpn_annee_cycle_3,
                    a5.vpn_periode_4                 vpn_periode_4,
                    a5.vpn_volume_alloue_5           vpn_volume_alloue_5,
                    a5.vpn_est_bascule_6             vpn_est_bascule_6,
                    a5.ca1_code_marque_7             ca1_code_marque_7,
                    a5.ca1_annee_cycle_8             ca1_annee_cycle_8,
                    a5.ca1_numero_cycle_9            ca1_numero_cycle_9,
                    a5.ca1_code_zone_production_10   ca1_code_zone_production_10,
                    a5.ca1_cycle_11                  ca1_cycle_11,
                    a5.fa1_code_famille_12           fa1_code_famille_12,
                    a5.fa1_code_marque_13            fa1_code_marque_13,
                    a5.pe1_annee_14                  pe1_annee_14,
                    a5.pe1_numero_15                 pe1_numero_15,
                    a5.pe1_code_zone_production_16   pe1_code_zone_production_16,
                    a5.pe1_periode_17                pe1_periode_17,
                    a4.pu1_id_pays_unitaire          pu1_id_pays_unitaire_18,
                    a4.pu1_code_psv_pays_programme   pu1_code_psv_pays_programme_19
                FROM
                    (
                        SELECT
                            a7.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                            a7.vpn_code_marque_1             vpn_code_marque_1,
                            a7.vpn_num_cycle_2               vpn_num_cycle_2,
                            a7.vpn_annee_cycle_3             vpn_annee_cycle_3,
                            a7.vpn_periode_4                 vpn_periode_4,
                            a7.vpn_volume_alloue_5           vpn_volume_alloue_5,
                            a7.vpn_est_bascule_6             vpn_est_bascule_6,
                            a7.ca1_code_marque_7             ca1_code_marque_7,
                            a7.ca1_annee_cycle_8             ca1_annee_cycle_8,
                            a7.ca1_numero_cycle_9            ca1_numero_cycle_9,
                            a7.ca1_code_zone_production_10   ca1_code_zone_production_10,
                            a7.ca1_cycle_11                  ca1_cycle_11,
                            a7.fa1_code_famille_12           fa1_code_famille_12,
                            a7.fa1_code_marque_13            fa1_code_marque_13,
                            a6.pe1_annee                     pe1_annee_14,
                            a6.pe1_numero                    pe1_numero_15,
                            a6.pe1_code_zone_production      pe1_code_zone_production_16,
                            a6.pe1_periode                   pe1_periode_17
                        FROM
                            (
                                SELECT
                                    a9.vpn_id_pays_unitaire_0        vpn_id_pays_unitaire_0,
                                    a9.vpn_code_marque_1             vpn_code_marque_1,
                                    a9.vpn_num_cycle_2               vpn_num_cycle_2,
                                    a9.vpn_annee_cycle_3             vpn_annee_cycle_3,
                                    a9.vpn_periode_4                 vpn_periode_4,
                                    a9.vpn_volume_alloue_5           vpn_volume_alloue_5,
                                    a9.vpn_est_bascule_6             vpn_est_bascule_6,
                                    a9.ca1_code_marque_7             ca1_code_marque_7,
                                    a9.ca1_annee_cycle_8             ca1_annee_cycle_8,
                                    a9.ca1_numero_cycle_9            ca1_numero_cycle_9,
                                    a9.ca1_code_zone_production_10   ca1_code_zone_production_10,
                                    a9.ca1_cycle_11                  ca1_cycle_11,
                                    a8.fa1_code_famille              fa1_code_famille_12,
                                    a8.fa1_code_marque               fa1_code_marque_13
                                FROM
                                    (
                                        SELECT
                                            a11.vpn_id_pays_unitaire       vpn_id_pays_unitaire_0,
                                            a11.vpn_code_marque            vpn_code_marque_1,
                                            a11.vpn_num_cycle              vpn_num_cycle_2,
                                            a11.vpn_annee_cycle            vpn_annee_cycle_3,
                                            a11.vpn_periode                vpn_periode_4,
                                            a11.vpn_volume_alloue          vpn_volume_alloue_5,
                                            a11.vpn_est_bascule            vpn_est_bascule_6,
                                            a10.ca1_code_marque            ca1_code_marque_7,
                                            a10.ca1_annee_cycle            ca1_annee_cycle_8,
                                            a10.ca1_numero_cycle           ca1_numero_cycle_9,
                                            a10.ca1_code_zone_production   ca1_code_zone_production_10,
                                            a10.ca1_cycle                  ca1_cycle_11
                                        FROM
                                            brc06_bpg00.rbvqtvpn   a11,
                                            brc06_bpg00.rbvqtca1   a10
                                        WHERE
                                            a10.ca1_code_marque = a11.vpn_code_marque
                                            AND a10.ca1_annee_cycle = a11.vpn_annee_cycle
                                            AND a10.ca1_numero_cycle = a11.vpn_num_cycle
                                    ) a9,
                                    brc06_bpg00.rbvqtfa1 a8
                                WHERE
                                    a9.vpn_code_marque_1 = a8.fa1_code_marque
                            ) a7,
                            brc06_bpg00.rbvqtpe1 a6
                        WHERE
                            a7.vpn_periode_4 = a6.pe1_periode
                    ) a5,
                    brc06_bpg00.rbvqtpu1 a4
                WHERE
                    a5.vpn_id_pays_unitaire_0 = a4.pu1_id_pays_unitaire
            ) a3,
            brc06_bpg00.rbvqtgrep a2
        WHERE
            a3.pu1_code_psv_pays_programme_19 = a2.grep_code_psv_pays_programme
    ) a1
WHERE
    a1.ca1_cycle_11 = 202206
    AND a1.ca1_code_zone_production_10 LIKE 'PSA'
    AND a1.vpn_est_bascule_6 LIKE 'N'
    AND a1.pe1_code_zone_production_16 LIKE 'PSA'
    AND a1.vpn_code_marque_1 LIKE 'OV'
    AND a1.grep_libelle_groupe_reporting_20 LIKE '500-DMOA'
GROUP BY
    a1.fa1_code_famille_12, 
    a1.pe1_annee_14, 
    '01' || '/' || a1.pe1_numero_15 || '/' || a1.pe1_annee_14, 
    'PROD ALL', 
    a1.pe1_numero_15, 
    a1.ca1_cycle_11, 
    a1.grep_libelle_pp_22, 
    to_char(sysdate) 
HAVING
    SUM(a1.vpn_volume_alloue_5) IS NOT NULL
ORDER BY
    "VALEUR" DESC
FETCH NEXT 10 ROWS ONLY
)"""

In [24]:
# Read data from Oracle
prod_all_ov = oracle_db.read_df_from_query(PROD_ALL, fetchsize=20000)

In [25]:
prod_all_ov.persist()

DataFrame[PROGRAM_COUNTRY: string, FAMILY: string, MONTHYEAR: string, MEASURE: string, VALEUR: decimal(38,10), INSERTED_DATE: string, CYCLE: decimal(6,0)]

In [26]:
prod_all_ov.limit(5).toPandas()

Unnamed: 0,PROGRAM_COUNTRY,FAMILY,MONTHYEAR,MEASURE,VALEUR,INSERTED_DATE,CYCLE
0,TURQUIE,2GON,01/10/2022,PROD ALL,5868.0,11-JUL-22,202206
1,TURQUIE,2GU9,01/10/2022,PROD ALL,5868.0,11-JUL-22,202206
2,TURQUIE,2GKR,01/10/2022,PROD ALL,5868.0,11-JUL-22,202206
3,TURQUIE,1GKR,01/10/2022,PROD ALL,5868.0,11-JUL-22,202206
4,TURQUIE,1GO5,01/10/2022,PROD ALL,5868.0,11-JUL-22,202206


In [27]:
prod_all_ov.count()

10

#### Write

In [28]:
# Write data to Oracle
oracle_db.write_df_to_oracle(
    prod_dem_ov,
    "BRC_SD01865.EPS_AC",
    mode="overwrite"
)

In [29]:
# Write data to Oracle
oracle_db.write_df_to_oracle(
    prod_all_ov,
    "BRC_SD01865.EPS_AC",
    mode="append"
)