In [262]:
import os, sys
import traceback
import pyspark
import pandas as pd
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.utils import AnalysisException

In [276]:
#Register BASL_AZF_DATE_CPT
dfBASL = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020/MONAT=9")
#dfBASL = spark.read.parquet("/data/dev_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020")
dfBASL.registerTempTable("BASL_AZF_DATE_CPT")

In [None]:
dfBASL.printSchema()

In [None]:
#Register BASL_AZF_DATE_CPT
dfBASL_parsed = spark.read.parquet("/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/DATA_DATE_PARTITION=2020-07-15")
#dfBASL = spark.read.parquet("/data/dev_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020")
dfBASL_parsed.registerTempTable("BASL_AZF_DATE_CPT_PARSED")

In [None]:
dfBASL_parsed.printSchema()

In [None]:
#Register Referential Tables PCA/PCO
Owner_RACCT = spark.read.parquet("/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/")
Owner_RACCT.registerTempTable("OWNER_RACCT")
Owner_BUKRS = spark.read.parquet("/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/")
Owner_BUKRS.registerTempTable("OWNER_BUKRS")
Owner_ACCOUNTS = spark.read.parquet("/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/")
Owner_ACCOUNTS.registerTempTable("OWNER_ACCOUNTS")

In [263]:
conf = pyspark.SparkConf()
conf.setMaster("spark://azfr2-spark-production-master.service.dsp.allianz:7077") \
    .set("spark.cores.max","12").set("spark.executor.memory", "6g").set("spark.executor.cores", "2") \
    .set("spark.driver.memory", "6g").setAppName("JUP_PRD_DE4_LARCHER_IMFR")
sc = pyspark.SparkContext(conf=conf)


spark = SparkSession.builder.getOrCreate()
sqlContext = SQLContext(sc)

In [None]:
def runInfoMap2(spark, myQuery, dict_dateFormat, dict_timestampFormat, decimalSeparator, 
               outputFile, outputFormat, nbPartitions, header=True, sep=";"):
    ### Execution of the query ###
    dfQuery = spark.sql(myQuery)
    res_schema = dfQuery.schema
    
#     df_tmp = dfQuery
#     df_tmp.printSchema()
    ### Casting all the non string columns ###
    # Getting all the non string columns
    nonStringColumns = [(_.name, _.dataType) for _ in res_schema if not isinstance(_.dataType, StringType)]
    # Cast, with specific processing for DateType, TimestampType & DecimalType
    for column in nonStringColumns:
        # Date
        if column[1] == DateType():
            if dict_dateFormat[column[0]]["upper"]:
                dfQuery = dfQuery.withColumn(column[0], upper(date_format(column[0], dict_dateFormat[column[0]]["format"])))
            else:
                dfQuery = dfQuery.withColumn(column[0], date_format(column[0], dict_dateFormat[column[0]]["format"]))
        # Timestamp
        elif column[1] == TimestampType():
            if dict_timestampFormat[column[0]]["upper"]:
                dfQuery = dfQuery.withColumn(column[0], upper(date_format(column[0], dict_timestampFormat[column[0]]["format"])))
            else:
                dfQuery = dfQuery.withColumn(column[0], date_format(column[0], dict_timestampFormat[column[0]]["format"]))
        # Decimal
        elif (isinstance(column[1], DecimalType)):
            dfQuery = dfQuery.withColumn(column[0], dfQuery[column[0]].cast(StringType()))
            if column[1].scale>0:
                dfQuery = dfQuery.withColumn(column[0], regexp_replace(column[0], "[0]+$", ""))
                dfQuery = dfQuery.withColumn(column[0], regexp_replace(column[0], "[.]$", ""))
            if decimalSeparator != '.':
                dfQuery = dfQuery.withColumn(column[0], regexp_replace(column[0], ".", decimalSeparator))
        # Other data types
        else:
            dfQuery = dfQuery.withColumn(column[0], dfQuery[column[0]].cast(StringType()))
    ### Couting number of lines ###
    # print("Number of lines: " + str(dfQuery.count()))
    ### Writing ###
    if outputFormat.upper() == "PARQUET":
        dfQuery.write.parquet(outputFile)
    elif outputFormat.upper() == "CSV":
        dfQuery.repartition(nbPartitions).write.csv(outputFile, sep=sep, header=header)
    
    return res_schema

## Pas obligatoirement a utiliser, c'est juste pour avoir le code sous la main ##
def compareInfomaps(spark, df1, df2):
    try:
        print("df1 - df2 : " + str(df1.subtract(df2).count()))
        print("df2 - df1 : " + str(df2.subtract(df1).count()))
    except AnalysisException:
        print("Les dataframes comparés n'ont pas la même structure.")
    except Exception as e:
        print("Type de l'erreur : " + str(e.__class__))

def nomatch(generated, reference, id_col):
    df=pd.DataFrame(columns=['columns', 'count'])
    df['columns']=generated.drop(id_col).schema.names
    def verif_col(name_col):
        generated_col =generated.select(name_col, id_col)
        reference_col=reference.select(name_col, id_col)
        return generated_col.subtract(reference_col).count()
    df['count']=df.apply(lambda x: verif_col(x['columns']), axis=1)
    return df

In [None]:
def nomatch(generated, reference, id_col):
    df=pd.DataFrame(columns=['columns', 'count'])
    df['columns']=generated.drop(id_col).schema.names
    def verif_col(name_col):
        generated_col =generated.select(name_col, id_col)
        reference_col=reference.select(name_col, id_col)
        return generated_col.subtract(reference_col).count()
    df['count']=df.apply(lambda x: verif_col(x['columns']), axis=1)
    return df

## AD01M

In [None]:
df_AD01M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD01M/LDM_PRD_FRA_AD01M_200604_070547.CSV", header=True, sep=";")

In [None]:
df_AD01M.count()

In [None]:
res_AD01M = "/data/dropbox/larcher/INFOMAPS/AD01M/AD01M_run_ldgrp_new_testressources2.parquet"

queryAD01M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,AJ,BEC_ERKENNUNG3,BELDAT,BLDAT,BUDAT,BUPRT,CPUDT,DATE3,DATE4,FMK1,FMK2,FS_HWAERS,GSA,
GSAKAT,HBSNR,MDCF,PRODSCHL,RISK,SHKZG,SNR,SUBRISK, EDS_TWBTRG AS LADM_TWBTRG, ZZONR, LDGRP
    FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet`
    WHERE GJAHR = 2020
    AND MONAT <= 8
    AND BUKRS IN ('9PFR', '9LFR','9RFR')
    AND (CPUDT < '2020-08-03' OR (CPUDT = '2020-08-03' AND CPUTM < '073940'))
    AND RACCT IN ('5111001008', '5111001009', '5111001048', '5111001049', '5111001060', '5111001070')
    AND ZZONR = 'NSPCOL'
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {
 
}
dfAD01M= sqlContext.sql(queryAD01M)
dfAD01M.repartition(1).write.parquet(res_AD01M)

#queryAD01M = runInfoMap2(spark, queryAD01M, dict_dateFormat, dict_timestampFormat, '.', res_AD01M, "csv", 1, True, ";")

In [None]:
dfAD01M_res2 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD01M/AD01M_run07.csv/part-00000-0fb0e471-4013-4dd1-ada1-c4ddb335363e-c000.csv",header=True,sep=';')
dfAD01M_res2.registerTempTable("AD01M_07_RES")

In [None]:
dfAD01M_res2.count()

In [None]:
dfIE01D = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IE01D/LDM_PRD_FRA_IE01D_200903_045428.CSV",header=True,sep=';')

## AD05M

In [None]:
res_AD05M = "/data/dropbox/larcher/INFOMAPS/AD05M/AD05M_regeneration_3009.csv"

queryAD05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,DATE3,
EDS_DMBTR_INS AS LADM_DMBTR,FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID, EDS_TWBTRG AS LADM_TWBTRG, USNAM,
CAST(VSNR AS INT), CAST(WERBERNR AS INT),ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR >= 2012  
    AND (GJAHR < 2020 OR (GJAHR = 2020 AND MONAT < 9)) 
    AND (CPUDT < '2020-09-03' OR (CPUDT = '2020-09-03' AND CPUTM < '070000'))
    AND BUKRS IN ('9LFR', '9RFR') 
    AND RACCT = '2411201043' 
    AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
   "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
     "DATE3" : {
        "format" : "yyyyMMdd",
        "upper" : False
    
    },
}
    

dict_timestampFormat = {

}
#dfAD05M= sqlContext.sql(queryAD05M)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAD05M = runInfoMap2(spark, queryAD05M, dict_dateFormat, dict_timestampFormat, '.', res_AD05M, "csv",1 , True, ";")

In [None]:
res_AD05M = "/data/dropbox/larcher/INFOMAPS/AD05M/AD05M_regeneration.csv"

queryAD05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,
TO_DATE(CAST(UNIX_TIMESTAMP(DATE3, 'yyyyMMdd') AS TIMESTAMP)) AS DATE3,
EDS_DMBTR_INS AS LADM_DMBTR,FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID, EDS_TWBTRG AS LADM_TWBTRG, USNAM,
CAST(VSNR AS INT), CAST(WERBERNR AS INT),ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR >= 2012  
    AND (GJAHR < 2020 OR (GJAHR = 2020 AND MONAT < 9)) 
    AND (CPUDT < '2020-09-03' OR (CPUDT = '2020-09-03' AND CPUTM < '070000'))
    AND BUKRS IN ('9LFR', '9RFR') 
    AND RACCT = '2411201043' 
    AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
   "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
     "DATE3" : {
        "format" : "yyyy-MM-dd",
        "upper" : False
    
    },
}
    

dict_timestampFormat = {

}
#dfAD05M= sqlContext.sql(queryAD05M)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAD05M = runInfoMap2(spark, queryAD05M, dict_dateFormat, dict_timestampFormat, '.', res_AD05M, "csv",5 , True, ";")

In [None]:
dfAD05M.printSchema()

In [None]:
dfAD05M.select('DATE3').distinct().show()

In [None]:
res_AD05M = "/data/dropbox/larcher/INFOMAPS/AD05M/AD05M_testdates_3.csv"

queryAD05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,DATE3,
EDS_DMBTR_INS AS LADM_DMBTR,FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID, EDS_TWBTRG AS LADM_TWBTRG, USNAM,
CAST(VSNR AS INT), CAST(WERBERNR AS INT),ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR = 2012  
    AND MONAT = 12
    AND BUKRS IN ('9LFR', '9RFR') 
    AND RACCT = '2411201043' 
    AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
   "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
     "DATE3" : {
        "format" : "yyyyMMdd",
        "upper" : False
    
    },
}
    

dict_timestampFormat = {

}
dfAD05M2= sqlContext.sql(queryAD05M)
#dfAD01M.repartition(10).write.parquet(dfYDR)

#queryAD05M = runInfoMap2(spark, queryAD05M, dict_dateFormat, dict_timestampFormat, '.', res_AD05M, "csv",1 , True, ";")

In [None]:
dfAD05M2.select("DATE3").distinct().show(20)

In [None]:
res_AD05M = "/data/dropbox/larcher/INFOMAPS/AD05M/AD05M_testdates_3.csv"

queryAD05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,DATE3,
EDS_DMBTR_INS AS LADM_DMBTR,FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID, EDS_TWBTRG AS LADM_TWBTRG, USNAM,
CAST(VSNR AS INT), CAST(WERBERNR AS INT),ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR = 2012  
    AND MONAT = 12
    AND BUKRS IN ('9LFR', '9RFR') 
    AND RACCT = '2411201043' 
    AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
   "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
     "DATE3" : {
        "format" : "yyyyMMdd",
        "upper" : False
    
    },
}
    

dict_timestampFormat = {

}
dfAD05M2= sqlContext.sql(queryAD05M)
#dfAD01M.repartition(10).write.parquet(dfYDR)

#queryAD05M = runInfoMap2(spark, queryAD05M, dict_dateFormat, dict_timestampFormat, '.', res_AD05M, "csv",1 , True, ";")

In [None]:
dfAD05M.show(1)

In [None]:
res_AD05M = "/data/dropbox/larcher/INFOMAPS/AD05M/AD05M.csv"

queryAD05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,DATE3,EDS_DMBTR_INS AS LADM_DMBTR,
FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID, EDS_TWBTRG AS LADM_TWBTRG, USNAM,VSNR,WERBERNR,ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR >= 2012  
    AND (GJAHR < 2020 OR (GJAHR = 2020 AND MONAT < 6)) 
    AND (CPUDT < '2020-06-04' OR (CPUDT = '2020-06-04' AND CPUTM < '070605'))
    AND BUKRS IN ('9LFR', '9RFR') 
    AND RACCT = '2411201043' 
    AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfAD01M2= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

#queryAD05M = runInfoMap2(spark, queryAD05M, dict_dateFormat, dict_timestampFormat, '.', res_AD05M, "csv", 20, True, ";")

In [None]:
df_AD05M_src_1 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/LDM_PRD_FRA_AD05M_200903_075457/LDM_PRD_FRA_AD05M_200903_075457.CSV", header = True, sep = ";")
df_AD05M_src_2 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/LDM_PRD_FRA_AD05M_200903_075458/LDM_PRD_FRA_AD05M_200903_075458.CSV", header = True, sep = ";")

In [None]:
df_AD05M_src = df_AD05M_src_1.union(df_AD05M_src_2)

In [None]:
df_AD05M_src.count()

In [None]:
df_AD05M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD05M/AD05M_regeneration_3009.csv", header = True, sep = ";")

In [None]:
df_AD05M.count()

In [None]:
df_AD05M.select("BELDAT","BLDAT","BUDAT","CPUDT","DATE3","VSNR","WERBERNR").show(10)

In [None]:
df_AD05M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/AD05M_regeneration.csv", header = True, sep = ";")

In [None]:
df_AD05M_src.registerTempTable("ad05m_src")
df_AD05M.registerTempTable("ad05m_res")

In [None]:
df_AD05M_src.printSchema()

In [None]:
df_AD05M.printSchema()

In [None]:
df_AD05M_src.select("BELDAT","BLDAT","BUDAT","CPUDT","DATE3","VSNR","WERBERNR").show(2)

In [None]:
df_AD05M.select("BELDAT","BLDAT","BUDAT","CPUDT","DATE3","VSNR","WERBERNR").show(2)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM ad05m_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
df_AD05M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/AD05M_regeneration.csv", header = True, sep = ";")

In [None]:
df_AD05M = df_AD05M.withColumn('DATE3', unix_timestamp('DATE3', "yyyymmdd") .cast(TimestampType()))
df_AD05Mdf = df_AD05M.withColumn('DATE3', upper(date_format(col("DATE3"), "yyyy-mm-dd")))

In [None]:
df_AD05Mdf.select("DATE3").show(10)

In [None]:
df_AD05Mdf.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M.csv",header = True, sep=';')

In [None]:
import os, shutil
def make_archive(source, destination):
        base = os.path.basename(destination)
        name = base.split('.')[0]
        format = base.split('.')[1]
        archive_from = os.path.dirname(source)
        archive_to = os.path.basename(source.strip(os.sep))
        print(source, destination, archive_from, archive_to)
        shutil.make_archive(name, format, archive_from, archive_to)
        shutil.move('%s.%s'%(name,format), destination)
make_archive("/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M.csv/GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M.CSV", "/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M.csv/VWZEDSY1_GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M_0004.zip")

In [None]:
def write_flag(name_imfr,number,directory):
    for file in os.listdir(directory):
        if file.endswith(".ZIP"):
            flag = file.split(name_imfr)[0] + str("flag.TXT")
            flag_ = open(str(directory) + str(flag), "w")
            flag_.write(number)
            flag_.close()
            line_command = """/data/dev_env/prog/data_prep/IMFR/send_sftp.sh
            """ + str(directory) + str(file) + """
            """ + str(directory) + str(flag) + """ 194.127.23.212 6710 dev DspR"""
            print(line_command)

In [None]:
name_imfr = "AD05M"
number = "1"
directory = "/data/dropbox/larcher/INFOMAPS/AD05M/regeneration/GY05_UAT_GY0703_UAT_20100903_20100903075458_AD05M.csv"
write_flag(name_imfr,number,directory)

In [None]:
 for file in os.listdir(directory):
        if file.endswith(".ZIP"):
            print(file)

In [None]:
flag = file.split(name_imfr)[0] + str("flag.TXT")
flag_ = open(str(directory) + str(flag), "w")
flag_.write(number)
flag_.close()

## AD06M

In [None]:
res_AD06M = "/data/dropbox/larcher/INFOMAPS/AD06M/AD06M2.csv"

queryAD06M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BEC_ERKENNUNG1,BELDAT,BKTXT,BLART,BLDAT,BUDAT,BUPER,CPUDT,
   DATE3,
   CAST(regexp_extract(CAST(CASE 
         WHEN BLART NOT IN ('PV', 'ZA') THEN \
            CASE \
               WHEN SAP_WAERS ='EUR' THEN \
                  CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR) \
                    ELSE COALESCE(TWBTRG, WRBTR) \
                  END \
                WHEN SAP_WAERS = 'XPF' THEN \
                   CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)/119.3317422 \
                     ELSE COALESCE(TWBTRG, WRBTR)/119.3317422 \
                   END \
                ELSE \
                   CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                      ELSE DMBTR \
                   END \
                END \
            ELSE \
                CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                    ELSE DMBTR \
                END \
           END AS STRING),'-?[0-9]*[.][0-9]{5}',0) AS DECIMAL(38,6)) AS LADM_DMBTR,
   FS_DCH,FS_WAERS,LDGRP,PRODSCHL,RVA,SGTXT,TICKID,
   CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)
                        ELSE COALESCE(TWBTRG, WRBTR)
                END AS LADM_TWBTRG,
   USNAM,VSNR,WERBERNR,ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` 
WHERE GJAHR >= 2012  
AND (GJAHR < 2020 OR (GJAHR = 2020 AND MONAT < 6)) 
AND BUKRS IN ('9LFR','9RFR')
AND (CPUDT < '2020-06-04' OR (CPUDT = '2020-06-04' AND CPUTM < '070628'))
AND RACCT IN ('2411101020', '2411201021')
AND ZZONR IN ('COGSYS', 'TPRGCP', 'MANUEL', 'TPREEL')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAD06M = runInfoMap2(spark, queryAD06M, dict_dateFormat, dict_timestampFormat, '.', res_AD06M, "csv", 30, True, ";")

## AD07M

In [None]:
res_AD07M = "/data/dropbox/larcher/INFOMAPS/AD07M/AD07M_date_format.csv"

queryAD07M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,ABZ,BUDAT,BUPER, FMK1, FS_HWAERS, GSAKAT, LDGRP,PRODSCHL,RISK,RVA,
                CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)
                        ELSE COALESCE(TWBTRG, WRBTR)
                END AS LADM_TWBTRG,
                ZZONR
                FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet`
                WHERE GJAHR = 2020
                AND MONAT < 6
                AND (CPUDT < '2020-06-04' OR (CPUDT = '2020-06-04' AND CPUTM < '070555'))
                AND BUKRS IN ('9LFR','9PFR','9RFR')
                AND RACCT IN ('5111001008', '5111001009', '5111001060', '5111001070','5111001160', '5112001028',
                '5112001029', '5112001030', '5112001070')
                AND ZZONR = 'SIGCOL'
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAD07M = runInfoMap2(spark, queryAD07M, dict_dateFormat, dict_timestampFormat, '.', res_AD07M, "csv", 1, True, ";")

## AE01D

In [None]:
## recherche du BASL_OUT_ID

In [None]:
df_AE01D_SRC = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AE01D/LDM_PRD_FRA_AE01D_200626_073037.CSV", header = True, sep = ";")

In [None]:
df_AE01D_SRC.count()

In [None]:
basl_max = df_AE01D_SRC.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min = df_AE01D_SRC.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print(basl_max)
print(basl_min)

In [None]:
res_AE01D = "/data/dropbox/larcher/INFOMAPS/AE01D/AE01D.csv"

queryAE01D = """
SELECT BASL.BUKRS, GJAHR, MONAT, BASL.RACCT, ZUONR, A_ALTACCOUNT, ABZ, ACCRESPID, 'Z00Z' AS ACCRESPIDF, AJ, ASSURE, AWKEY, AZJ,AZP,
       BASISBETRAG, BASL_OUT_ID, BEC, BEC_ERKENNUNG1, BEC_ERKENNUNG2, BEC_ERKENNUNG3, BEC_ERKENNUNG7, BELDAT, BELNR, BETRAGSART,
       BGZN, BKTXT, BLART, BLDAT, BNRB, BSCHL, BUBSZ, BUDAT, BUPER, BUPRT, BUZEI, CISIN, CPUDT, CSG, DATE1, DATE2, DATE3, DATE4,
       CAST(regexp_extract(CAST(CASE 
         WHEN BLART NOT IN ('PV', 'ZA') THEN \
            CASE \
               WHEN SAP_WAERS ='EUR' THEN \
                  CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR) \
                    ELSE COALESCE(TWBTRG, WRBTR) \
                  END \
                WHEN SAP_WAERS = 'XPF' THEN \
                   CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)/119.3317422 \
                     ELSE COALESCE(TWBTRG, WRBTR)/119.3317422 \
                   END \
                ELSE \
                   CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                      ELSE DMBTR \
                   END \
                END \
            ELSE \
                CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                    ELSE DMBTR \
                END \
           END AS STRING),'-?[0-9]*[.][0-9]{5}',0) AS DECIMAL(38,6)) AS LADM_DMBTR,
           ENTRC, ESTIMATE, FMK1, FMK2, FS_BLNR1, FS_BLNR2, FS_BSCHL, FS_BUKRS, FS_DCH, FS_HWAERS, FS_KOSTL, FS_LDGRP1,
       FS_VBUND1, FS_VBUND2, FS_WAERS, GJAGJE, GJVJ, GPOS, GSA, GSAKAT, GSBER, GSP, HBSNR, HGPOS, HWAER, ID_IRCA, INKASSOART, KHD,
       KOSTL, KUNDENNR, nvl(pos.fs_mwskz,pos.mwskz_fra) AS LADM_MWSKZ, LANDL, LDGRP, LIFNR, MAY, MDCF, MWSKZ, MWST_SATZ, PAYID, PERNR, PFS, PRG, PRODSCHL, PRODSCHL_FS,
       PROJK, QTE_UC, REFPGA, RIP, RISK, RISKLAND, RVA, RVJAHR, RVVERTRAG, SAP_WAERS, SCHADENOM, SDARTGRP, SEGMENT, SGTXT, SHKZG,
       SNR, SUB_ACCT, SUBRISK, TARIF, TAX_OPT, TAX_RATE, TCMU, TCODE, TICKID, TWBTRG,
       CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)
             ELSE COALESCE(TWBTRG, WRBTR)
       END AS LADM_TWBTRG
       , USNAM, VBUND, VMK, VORVNR, VSNR,
       VT_AZP_ORIGINAL, VTNRAB, VTNRRE, VUNR, WAERS, WERBERNR, WITHT, WRBTR, WT_QBUIHH, WT_QSSHH, WT_WITHCD, WWERT, XREF3, YYCSG,
       YYDCH, YYLOB, YYMAY, YYPRG, ZFBDT, ZJAHR, ZUBRINGER, ZUGJAHR, ZUGMONAT, ZZBSZ, ZZFMK1, ZZFMK2, ZZONR, ZZVERD, ZZVMK, XREF1,
       XREF2
FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet` AS BASL
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)
WHERE BASL.RACCT NOT IN ('828888000')
AND BASL.BLART NOT IN ('PV', 'ZA')
AND BASL.PFS IS NULL
AND BASL.LDGRP IN (' ', 'L1') OR LDGRP IS NULL
AND BASL.ZZONR IN ('ABSNCA','AECMIG','AECTNV','AECTPM','BALLIL','CLEVAC','CLEVAO','CLEVAV','INVMAN','INVPFA','TPCBAEC','TPCIER',
              'TPCOAEC','TPCOASS','TPCONV','TPCTAE','TPCTAEC','TPCTNV','TPCTOM','TPDCIE','TPDCIO','TPINVE','TPNGAA','TPRDOM',
              'TPRPFA','TPRABS','TPCALY','TPGEPC','TPCARG','TPCGNV','TPCGAP')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAE01D= runInfoMap2(spark, queryAE01D, dict_dateFormat, dict_timestampFormat, '.', res_AE01D, "csv", 1, True, ";")

In [None]:
res_AE01D = "/data/dropbox/larcher/INFOMAPS/AE01D/AE01D.csv"

queryAE01D = """
SELECT /*  BROADCAST(BASL_EDITED), BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */
        BASL.BUKRS, BASL.GJAHR, BASL.MONAT, BASL.RACCT, BASL.ZUONR, A_ALTACCOUNT, BASL.ABZ, BASL.ACCRESPID,
       BASL.BASISBETRAG, BASL.BASL_OUT_ID, BASL.BEC, BASL.BEC_ERKENNUNG1, BASL.BEC_ERKENNUNG2, BASL.BEC_ERKENNUNG3,
       BASL.BEC_ERKENNUNG7, BASL.BELDAT, BASL.BELNR, BASL.BETRAGSART, BASL.BGZN, BASL.BKTXT, BASL.BLART, BASL.BLDAT,
       BASL.BNRB, BASL.BSCHL, BASL.BUBSZ, BASL.BUDAT, BASL.BUPER, BASL.BUPRT, BASL.BUZEI, BASL.CISIN, BASL.CPUDT,
       BASL.CSG, BASL.DATE1, BASL.DATE2, BASL.DATE3, BASL.DATE4,
       BASL_EDITED.EDS_DMBTR_INS AS LADM_DMBTR,
       BASL.ENTRC, BASL.ESTIMATE, BASL.FMK1, BASL.FMK2, BASL.FS_BLNR1, BASL.FS_BLNR2, BASL.FS_BSCHL, BASL.FS_BUKRS, BASL.FS_DCH,
       BASL.FS_HWAERS, BASL.FS_KOSTL, BASL.FS_LDGRP1, BASL.FS_VBUND1, BASL.FS_VBUND2, BASL.FS_WAERS, BASL.GJAGJE, BASL.GJVJ,
       BASL.GPOS, BASL.GSA, BASL.GSAKAT, BASL.GSBER, BASL.GSP, BASL.HBSNR, BASL.HGPOS, BASL.HWAER, BASL.ID_IRCA, BASL.INKASSOART,
       BASL.KHD, BASL.KOSTL, BASL.KUNDENNR, BASL_EDITED.EDS_MWSKZ AS LADM_MWSKZ, BASL.LANDL, BASL.LDGRP, BASL.LIFNR, BASL.MAY,
       BASL.MDCF, BASL.MWSKZ, BASL.MWST_SATZ, BASL.PAYID, BASL.PERNR, BASL.PFS, BASL.PRG, BASL.PRODSCHL, BASL.PRODSCHL_FS,
       BASL.PROJK, BASL.QTE_UC, BASL.REFPGA, BASL.RIP, BASL.RISK, BASL.RISKLAND, BASL.RVA, BASL.RVJAHR, BASL.RVVERTRAG,
       BASL.SAP_WAERS, BASL.SCHADENOM, BASL.SDARTGRP, BASL.SEGMENT, BASL.SGTXT, BASL.SHKZG, BASL.SNR, BASL.SUB_ACCT, 
       BASL.SUBRISK, BASL.TARIF, BASL.TAX_OPT, BASL.TAX_RATE, BASL.TCMU, BASL.TCODE, BASL.TICKID, BASL.TWBTRG,
       BASL_EDITED.EDS_TWBTRG AS LADM_TWBTRG, BASL.USNAM, BASL.VBUND, BASL.VMK, BASL.VORVNR, BASL.VSNR, BASL.VT_AZP_ORIGINAL,
       BASL.VTNRAB, BASL.VTNRRE, BASL.VUNR, BASL.WAERS, BASL.WERBERNR, BASL.WITHT, BASL.WRBTR, BASL.WT_QBUIHH, BASL.WT_QSSHH,
       BASL.WT_WITHCD, BASL.WWERT, BASL.XREF3, BASL.YYCSG, BASL.YYDCH, BASL.YYLOB, BASL.YYMAY, BASL.YYPRG, BASL.ZFBDT,
       BASL.ZJAHR, BASL.ZUBRINGER, BASL.ZUGJAHR, BASL.ZUGMONAT, BASL.ZZBSZ, BASL.ZZFMK1, BASL.ZZFMK2, BASL.ZZONR, BASL.ZZVERD,
       BASL.ZZVMK, BASL.XREF1, BASL.XREF2
       
FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/` AS BASL
              INNER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS BASL_EDITED ON (BASL_EDITED.AUDIT_SRC_FILE = BASL.AUDIT_SRC_FILE AND BASL_EDITED.BASL_OUT_ID = BASL.BASL_OUT_ID )
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)
WHERE BASL.GJAHR = 2020
AND BASL.RACCT NOT IN ('828888000')
AND BASL.BLART NOT IN ('PV','ZA')
AND BASL.PFS IS NULL
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR IN ('ABSNCA','AECMIG','AECTNV','AECTPM','BALLIL','CLEVAC','CLEVAO','CLEVAV','INVMAN','INVPFA','TPCBAEC','TPCIER',
              'TPCOAEC','TPCOASS','TPCONV','TPCTAE','TPCTAEC','TPCTNV','TPCTOM','TPDCIE','TPDCIO','TPINVE','TPNGAA','TPRDOM',
              'TPRPFA','TPRABS','TPCALY','TPGEPC','TPCARG','TPCGNV','TPCGAP')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "dMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "dMMMyyyy:HH:mm:ss",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
 
}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAE01D= runInfoMap2(spark, queryAE01D, dict_dateFormat, dict_timestampFormat, '.', res_AE01D, "csv", 1, True, ";")

In [None]:
res_AE01D = "/data/dropbox/larcher/INFOMAPS/AE01D/AE01D_old.csv"

queryAE01D = """
SELECT /* BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */
       BASL.BUKRS, GJAHR, MONAT, BASL.RACCT, ZUONR, A_ALTACCOUNT, ABZ, ACCRESPID, 'Z00Z' AS ACCRESPIDF, AJ, ASSURE, AWKEY, AZJ, AZP,
       BASISBETRAG, BASL_OUT_ID, BEC, BEC_ERKENNUNG1, BEC_ERKENNUNG2, BEC_ERKENNUNG3, BEC_ERKENNUNG7, BELDAT, BELNR, BETRAGSART,
       BGZN, BKTXT, BLART, BLDAT, BNRB, BSCHL, BUBSZ, BUDAT, BUPER, BUPRT, BUZEI, CISIN, CPUDT, CSG, DATE1, DATE2, DATE3, DATE4,
       CAST(regexp_extract(CAST(CASE 
         WHEN BLART NOT IN ('PV', 'ZA') THEN \
            CASE \
               WHEN SAP_WAERS ='EUR' THEN \
                  CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR) \
                    ELSE COALESCE(TWBTRG, WRBTR) \
                  END \
                WHEN SAP_WAERS = 'XPF' THEN \
                   CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)/119.3317422 \
                     ELSE COALESCE(TWBTRG, WRBTR)/119.3317422 \
                   END \
                ELSE \
                   CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                      ELSE DMBTR \
                   END \
                END \
            ELSE \
                CASE WHEN SHKZG = 'H' THEN -1*DMBTR \
                    ELSE DMBTR \
                END \
           END AS STRING),'-?[0-9]*[.][0-9]{5}',0) AS DECIMAL(38,6)) AS LADM_DMBTR,
           ENTRC, ESTIMATE, FMK1, FMK2, FS_BLNR1, FS_BLNR2, FS_BSCHL, FS_BUKRS, FS_DCH, FS_HWAERS, FS_KOSTL, FS_LDGRP1,
       FS_VBUND1, FS_VBUND2, FS_WAERS, GJAGJE, GJVJ, GPOS, GSA, GSAKAT, GSBER, GSP, HBSNR, HGPOS, HWAER, ID_IRCA, INKASSOART, KHD,
       KOSTL, KUNDENNR, LANDL, LDGRP, LIFNR, MAY, MDCF, MWSKZ, MWST_SATZ, PAYID, PERNR, PFS, PRG, PRODSCHL, PRODSCHL_FS,
       PROJK, QTE_UC, REFPGA, RIP, RISK, RISKLAND, RVA, RVJAHR, RVVERTRAG, SAP_WAERS, SCHADENOM, SDARTGRP, SEGMENT, SGTXT, SHKZG,
       SNR, SUB_ACCT, SUBRISK, TARIF, TAX_OPT, TAX_RATE, TCMU, TCODE, TICKID, TWBTRG,
       CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)
             ELSE COALESCE(TWBTRG, WRBTR)
       END AS LADM_TWBTRG
       , USNAM, VBUND, VMK, VORVNR, VSNR,
       VT_AZP_ORIGINAL, VTNRAB, VTNRRE, VUNR, WAERS, WERBERNR, WITHT, WRBTR, WT_QBUIHH, WT_QSSHH, WT_WITHCD, WWERT, XREF3, YYCSG,
       YYDCH, YYLOB, YYMAY, YYPRG, ZFBDT, ZJAHR, ZUBRINGER, ZUGJAHR, ZUGMONAT, ZZBSZ, ZZFMK1, ZZFMK2, ZZONR, ZZVERD, ZZVMK, XREF1,
       XREF2
FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet` AS BASL
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)
WHERE BASL.RACCT NOT IN ('828888000')
AND BASL.BLART NOT IN ('PV', 'ZA')
AND BASL.PFS IS NULL
AND BASL.LDGRP IN (' ', 'L1') OR LDGRP IS NULL
AND BASL.ZZONR IN ('ABSNCA','AECMIG','AECTNV','AECTPM','BALLIL','CLEVAC','CLEVAO','CLEVAV','INVMAN','INVPFA','TPCBAEC','TPCIER',
              'TPCOAEC','TPCOASS','TPCONV','TPCTAE','TPCTAEC','TPCTNV','TPCTOM','TPDCIE','TPDCIO','TPINVE','TPNGAA','TPRDOM',
              'TPRPFA','TPRABS','TPCALY','TPGEPC','TPCARG','TPCGNV','TPCGAP')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAE01D= runInfoMap2(spark, queryAE01D, dict_dateFormat, dict_timestampFormat, '.', res_AE01D, "csv", 20, True, ";")

In [None]:
    query = """
SELECT BASL.BUKRS, GJAHR, MONAT, BASL.RACCT, ZUONR, A_ALTACCOUNT, ABZ, ACCRESPID, 'Z00Z' AS ACCRESPIDF, AJ, ASSURE, AWKEY, AZJ, AZP,
       BASISBETRAG, BASL_OUT_ID, BEC, BEC_ERKENNUNG1, BEC_ERKENNUNG2, BEC_ERKENNUNG3, BEC_ERKENNUNG7, BELDAT, BELNR, BETRAGSART,
       BGZN, BKTXT, BLART, BLDAT, BNRB, BSCHL, BUBSZ, BUDAT, BUPER, BUPRT, BUZEI, CISIN, CPUDT, CSG, DATE1, DATE2, DATE3, DATE4,
       EDS_DMBTR_INS AS LADM_DMBTR, ENTRC, ESTIMATE, FMK1, FMK2, FS_BLNR1, FS_BLNR2, FS_BSCHL, FS_BUKRS, FS_DCH, FS_HWAERS, FS_KOSTL, FS_LDGRP1,
       FS_VBUND1, FS_VBUND2, FS_WAERS, GJAGJE, GJVJ, GPOS, GSA, GSAKAT, GSBER, GSP, HBSNR, HGPOS, HWAER, ID_IRCA, INKASSOART, KHD,
       KOSTL, KUNDENNR, EDS_MWSKZ AS LADM_MWSKZ, LANDL, LDGRP, LIFNR, MAY, MDCF, MWSKZ, MWST_SATZ, PAYID, PERNR, PFS, PRG, PRODSCHL, PRODSCHL_FS,
       PROJK, QTE_UC, REFPGA, RIP, RISK, RISKLAND, RVA, RVJAHR, RVVERTRAG, SAP_WAERS, SCHADENOM, SDARTGRP, SEGMENT, SGTXT, SHKZG,
       SNR, SUB_ACCT, SUBRISK, TARIF, TAX_OPT, TAX_RATE, TCMU, TCODE, TICKID, TWBTRG,
       CASE WHEN SHKZG = 'H' THEN -1*COALESCE(TWBTRG, WRBTR)
                ELSE COALESCE(TWBTRG, WRBTR)
       END AS LADM_TWBTRG,
       USNAM, VBUND, VMK, VORVNR, VSNR,
       VT_AZP_ORIGINAL, VTNRAB, VTNRRE, VUNR, WAERS, WERBERNR, WITHT, WRBTR, WT_QBUIHH, WT_QSSHH, WT_WITHCD, WWERT, XREF3, YYCSG,
       YYDCH, YYLOB, YYMAY, YYPRG, ZFBDT, ZJAHR, ZUBRINGER, ZUGJAHR, ZUGMONAT, ZZBSZ, ZZFMK1, ZZFMK2, ZZONR, ZZVERD, ZZVMK, XREF1,
       XREF2
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)
WHERE BASL.RACCT NOT IN ('828888000')
AND BASL.BLART NOT IN ('PV', 'ZA')
AND BASL.PFS IS NULL
AND BASL.LDGRP IN ('', ' ', 'L1')
AND BASL.ZZONR IN ('ABSNCA','AECMIG','AECTNV','AECTPM','BALLIL','CLEVAC','CLEVAO','CLEVAV','INVMAN','INVPFA','TPCBAEC','TPCIER',
              'TPCOAEC','TPCOASS','TPCONV','TPCTAE','TPCTAEC','TPCTNV','TPCTOM','TPDCIE','TPDCIO','TPINVE','TPNGAA','TPRDOM',
              'TPRPFA','TPRABS','TPCALY','TPGEPC','TPCARG','TPCGNV','TPCGAP')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'"""


In [None]:
res_AE01D = "/data/dropbox/larcher/INFOMAPS/AE01D/AE01D_test11.csv"

queryAE01D = """
SELECT /* BROADCAST(BASL), BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */ 
BASL.BUKRS, GJAHR, MONAT, BASL.RACCT, ZUONR, A_ALTACCOUNT, ABZ, ACCRESPID, 'Z00Z' AS ACCRESPIDF, AJ, ASSURE, AWKEY, AZJ,AZP,
       BASISBETRAG, BASL_OUT_ID, BEC, BEC_ERKENNUNG1, BEC_ERKENNUNG2, BEC_ERKENNUNG3, BEC_ERKENNUNG7, BELDAT, BELNR, BETRAGSART,
       BGZN, BKTXT, BLART, BLDAT, BNRB, BSCHL, BUBSZ, BUDAT, BUPER, BUPRT, BUZEI, CISIN, CPUDT, CSG, DATE1, DATE2, DATE3, DATE4,
       EDS_DMBTR_INS AS LADM_DMBTR ,
       ENTRC, ESTIMATE, FMK1, FMK2, FS_BLNR1, FS_BLNR2, FS_BSCHL, FS_BUKRS, FS_DCH, FS_HWAERS, FS_KOSTL, FS_LDGRP1,
       FS_VBUND1, FS_VBUND2, FS_WAERS, GJAGJE, GJVJ, GPOS, GSA, GSAKAT, GSBER, GSP, HBSNR, HGPOS, HWAER, ID_IRCA, INKASSOART, KHD,
       KOSTL, KUNDENNR, EDS_MWSKZ AS LADM_MWSKZ, LANDL, LDGRP, LIFNR, MAY, MDCF, MWSKZ, MWST_SATZ, PAYID, PERNR, PFS, PRG, PRODSCHL, PRODSCHL_FS,
       PROJK, QTE_UC, REFPGA, RIP, RISK, RISKLAND, RVA, RVJAHR, RVVERTRAG, SAP_WAERS, SCHADENOM, SDARTGRP, SEGMENT, SGTXT, SHKZG,
       SNR, SUB_ACCT, SUBRISK, TARIF, TAX_OPT, TAX_RATE, TCMU, TCODE, TICKID, TWBTRG,
       EDS_TWBTRG AS LADM_TWBTRG
       , USNAM, VBUND, VMK, VORVNR, VSNR,
       VT_AZP_ORIGINAL, VTNRAB, VTNRRE, VUNR, WAERS, WERBERNR, WITHT, WRBTR, WT_QBUIHH, WT_QSSHH, WT_WITHCD, WWERT, XREF3, YYCSG,
       YYDCH, YYLOB, YYMAY, YYPRG, ZFBDT, ZJAHR, ZUBRINGER, ZUGJAHR, ZUGMONAT, ZZBSZ, ZZFMK1, ZZFMK2, ZZONR, ZZVERD, ZZVMK, XREF1,
       XREF2

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
              INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)
WHERE GJAHR = 2020
AND MONAT <= 6
AND BASL.RACCT NOT IN ('828888000')
AND BASL.BLART NOT IN ('PV', 'ZA')
AND BASL.PFS IS NULL
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR IN ('ABSNCA','AECMIG','AECTNV','AECTPM','BALLIL','CLEVAC','CLEVAO','CLEVAV','INVMAN','INVPFA','TPCBAEC','TPCIER',
              'TPCOAEC','TPCOASS','TPCONV','TPCTAE','TPCTAEC','TPCTNV','TPCTOM','TPDCIE','TPDCIO','TPINVE','TPNGAA','TPRDOM',
              'TPRPFA','TPRABS','TPCALY','TPGEPC','TPCARG','TPCGNV','TPCGAP')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : True   
     }
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryAE01D= runInfoMap2(spark, queryAE01D, dict_dateFormat, dict_timestampFormat, '.', res_AE01D, "csv", 1, True, ";")

In [None]:
AEO1D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AE01D/AE01D_test11.csv",header=True,sep=';')
AEO1D_res.registerTempTable("aeo1d_res")

In [None]:
AEO1D_res.printSchema()

In [None]:
AEO1D_src = spark.read.csv("/data/dropbox/larcher/INFOMAPS/AE01D/LDM_PRD_FRA_AE01D_200626_073037.CSV",header=True,sep=';')
AEO1D_src.registerTempTable("aeo1d_src")

In [None]:
AEO1D_src.printSchema()

In [None]:
print(AEO1D_res.count())
print(AEO1D_src.count())

In [None]:
query1 = """SELECT *
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

AEO1D_src_corrige = sqlContext.sql(query1)
AEO1D_src_corrige.count()
AEO1D_src_corrige.registerTempTable("aeo1d_src2")

In [None]:
AEO1D_src_corrige.subtract(AEO1D_res).count()

In [None]:
AEO1D_res.subtract(AEO1D_src_corrige).count()

In [None]:
AEO1D_src_corrige_dropped = AEO1D_src_corrige.drop("LADM_TWBTRG") \
                                             .drop("LADM_DMBTR") \
                                             .drop("LADM_MWSKZ") \
                                             .drop("TWBTRG") \
                                             .drop("BLDAT") \
                                             .drop("BUDAT") \
                                             .drop("CPUDT") \
                                             .drop("WWERT") 
        
    
AEO1D_res_dropped = AEO1D_res.drop("LADM_TWBTRG") \
                             .drop("LADM_DMBTR") \
                             .drop("LADM_MWSKZ") \
                             .drop("TWBTRG") \
                             .drop("BLDAT") \
                             .drop("BUDAT") \
                             .drop("CPUDT") \
                             .drop("WWERT")
        
        

In [None]:
AEO1D_src_corrige = AEO1D_src_corrige.drop("BLDAT") \
                                             .drop("BUDAT") \
                                             .drop("CPUDT") \
                                             .drop("WWERT") 
        
    
AEO1D_res = AEO1D_res.drop("BLDAT") \
                             .drop("BUDAT") \
                             .drop("CPUDT") \
                             .drop("WWERT")

In [None]:
AEO1D_src_corrige_dropped.subtract(AEO1D_res_dropped).count()

In [None]:
AEO1D_res_dropped.subtract(AEO1D_src_corrige_dropped).count()

In [None]:
AEO1D_src_corrige = AEO1D_src_corrige.drop("ACCRESPID").drop("ACCRESPIDF")
AEO1D_src_corrige = AEO1D_src_corrige.withColumn("ZUONR", AEO1D_src_corrige["ZUONR"].cast(DecimalType(38, 13))) \
                                    .withColumn("AWKEY", AEO1D_src_corrige["AWKEY"].cast(DecimalType(38, 13))) \
                                    .withColumn("BASL_OUT_ID", AEO1D_src_corrige["BASL_OUT_ID"].cast(DecimalType(38, 13))) \
                                    .withColumn("LADM_TWBTRG", AEO1D_src_corrige["LADM_TWBTRG"].cast(DecimalType(38, 13))) \
                                    .withColumn("LADM_DMBTR", AEO1D_src_corrige["LADM_DMBTR"].cast(DecimalType(38, 13))) \
                                    .withColumn("LADM_MWSKZ", AEO1D_src_corrige["LADM_MWSKZ"].cast(DecimalType(38, 13))) \
                                    .withColumn("TWBTRG", AEO1D_src_corrige["TWBTRG"].cast(DecimalType(38, 13)))

AEO1D_src_corrige_columns = AEO1D_src_corrige.columns
AEO1D_src_corrige = AEO1D_src_corrige.fillna("NULL_VALUE", subset=AEO1D_src_corrige_columns)
AEO1D_src_corrige.createOrReplaceTempView("table_query_AE01D_res")

In [None]:
AEO1D_res = AEO1D_res.drop("ACCRESPID").drop("ACCRESPIDF")
AEO1D_res = AEO1D_res.withColumn("ZUONR", AEO1D_res["ZUONR"].cast(DecimalType(38, 5))) \
                                                     .withColumn("AWKEY", AEO1D_res["AWKEY"].cast(DecimalType(38, 13))) \
                                                     .withColumn("BASL_OUT_ID", AEO1D_res["BASL_OUT_ID"].cast(DecimalType(38, 13))) \
                                                     .withColumn("LADM_TWBTRG", AEO1D_res["LADM_TWBTRG"].cast(DecimalType(38,13))) \
                                                     .withColumn("LADM_DMBTR", AEO1D_res["LADM_DMBTR"].cast(DecimalType(38, 13))) \
                                                     .withColumn("LADM_MWSKZ", AEO1D_res["LADM_MWSKZ"].cast(DecimalType(38, 13))) \
                                                     .withColumn("TWBTRG", AEO1D_res["TWBTRG"].cast(DecimalType(38, 13)))

                                                                     
AEO1D_res_columns = AEO1D_res.columns
AEO1D_res = AEO1D_res.fillna("NULL_VALUE", subset=AEO1D_res_columns)
AEO1D_res.createOrReplaceTempView("table_query_AE01D_src")

In [None]:
AEO1D_src_corrige_dropped.count()

In [None]:
AEO1D_res_dropped.count()

In [None]:
AEO1D_src_corrige.subtract(AEO1D_res).count()

In [None]:
AEO1D_res.subtract(AEO1D_src_corrige).count()

In [None]:
AEO1D_src_corrige = AEO1D_src_corrige.select("BUKRS","GJAHR","MONAT","RACCT","ZUONR","A_ALTACCOUNT","ABZ","AJ","ASSURE","AWKEY","AZJ","AZP","BASISBETRAG","BASL_OUT_ID","BEC","BEC_ERKENNUNG1","BEC_ERKENNUNG2","BEC_ERKENNUNG3","BEC_ERKENNUNG7","BELDAT","BELNR","BETRAGSART","BGZN","BKTXT","BLART","BNRB","BSCHL","BUBSZ","BUPER","BUPRT","BUZEI","CISIN","CSG","DATE1","DATE2","DATE3","DATE4","ENTRC","ESTIMATE","FMK1","FMK2","FS_BLNR1","FS_BLNR2","FS_BSCHL","FS_BUKRS","FS_DCH","FS_HWAERS","FS_KOSTL","FS_LDGRP1","FS_VBUND1","FS_VBUND2","FS_WAERS","GJAGJE","GJVJ","GPOS","GSA","GSBER","GSP","HBSNR","HGPOS","HWAER","ID_IRCA","INKASSOART","KHD","KOSTL","KUNDENNR","LANDL","LDGRP","LIFNR","MAY","MDCF","MWSKZ","MWST_SATZ","PAYID","PERNR","PFS","PRG","PRODSCHL","PRODSCHL_FS","PROJK","QTE_UC","REFPGA","RIP","RISK","RISKLAND","RVA","RVJAHR","RVVERTRAG","SAP_WAERS","SCHADENOM","SDARTGRP","SEGMENT","SGTXT","SHKZG","SNR","SUB_ACCT","SUBRISK","TARIF","TAX_OPT","TAX_RATE","TCMU","TCODE","TICKID","USNAM","VBUND","VMK","VORVNR","VSNR","VT_AZP_ORIGINAL","VTNRAB","VTNRRE","VUNR","WAERS","WERBERNR","WITHT","WRBTR","WT_QBUIHH","WT_QSSHH","WT_WITHCD","XREF3","YYCSG","YYDCH","YYLOB","YYMAY","YYPRG","ZFBDT","ZJAHR","ZUBRINGER","ZUGJAHR","ZUGMONAT","ZZBSZ","ZZFMK1","ZZFMK2","ZZONR","ZZVERD","ZZVMK","XREF1",
                                                             "XREF2","LADM_TWBTRG")

In [None]:
AEO1D_res = AEO1D_res.select("BUKRS","GJAHR","MONAT","RACCT","ZUONR","A_ALTACCOUNT","ABZ","AJ","ASSURE","AWKEY","AZJ","AZP","BASISBETRAG","BASL_OUT_ID","BEC","BEC_ERKENNUNG1","BEC_ERKENNUNG2","BEC_ERKENNUNG3","BEC_ERKENNUNG7","BELDAT","BELNR","BETRAGSART","BGZN","BKTXT","BLART","BNRB","BSCHL","BUBSZ","BUPER","BUPRT","BUZEI","CISIN","CSG","DATE1","DATE2","DATE3","DATE4","ENTRC","ESTIMATE","FMK1","FMK2","FS_BLNR1","FS_BLNR2","FS_BSCHL","FS_BUKRS","FS_DCH","FS_HWAERS","FS_KOSTL","FS_LDGRP1","FS_VBUND1","FS_VBUND2","FS_WAERS","GJAGJE","GJVJ","GPOS","GSA","GSBER","GSP","HBSNR","HGPOS","HWAER","ID_IRCA","INKASSOART","KHD","KOSTL","KUNDENNR","LANDL","LDGRP","LIFNR","MAY","MDCF","MWSKZ","MWST_SATZ","PAYID","PERNR","PFS","PRG","PRODSCHL","PRODSCHL_FS","PROJK","QTE_UC","REFPGA","RIP","RISK","RISKLAND","RVA","RVJAHR","RVVERTRAG","SAP_WAERS","SCHADENOM","SDARTGRP","SEGMENT","SGTXT","SHKZG","SNR","SUB_ACCT","SUBRISK","TARIF","TAX_OPT","TAX_RATE","TCMU","TCODE","TICKID","USNAM","VBUND","VMK","VORVNR","VSNR","VT_AZP_ORIGINAL","VTNRAB","VTNRRE","VUNR","WAERS","WERBERNR","WITHT","WRBTR","WT_QBUIHH","WT_QSSHH","WT_WITHCD","XREF3","YYCSG","YYDCH","YYLOB","YYMAY","YYPRG","ZFBDT","ZJAHR","ZUBRINGER","ZUGJAHR","ZUGMONAT","ZZBSZ","ZZFMK1","ZZFMK2","ZZONR","ZZVERD","ZZVMK","XREF1",
                                                             "XREF2","LADM_TWBTRG")

In [None]:
AEO1D_src_corrige.subtract(AEO1D_res).count()

In [None]:
AEO1D_res.subtract(AEO1D_src_corrige).count()

In [None]:
csv_test = "/data/dropbox/larcher/INFOMAPS/AE01D/analyses/AE01D_analyses_mismatch"

df_query_SKB = df_query_SKB.subtract(df_src_SKB)

eds_SKB_columns = df_query_SKB.columns

df_query_SKB_columns_renamed = df_query_SKB

for eds_SKB_column in eds_SKB_columns:
    df_query_SKB_columns_renamed = df_query_SKB_columns_renamed.withColumnRenamed(eds_SKB_column, "EDS_" + eds_SKB_column)

df_SKB_join_eds_src = df_query_SKB_columns_renamed.join(df_src_SKB, df_query_SKB_columns_renamed["EDS_BASL_OUT_ID"] == df_src_SKB["BASL_OUT_ID"], how='left_outer')

for eds_SKB_column in eds_SKB_columns:
    df_SKB_join_eds_src = df_SKB_join_eds_src.withColumn("FLAG_" + eds_SKB_column, when(df_SKB_join_eds_src["EDS_" + eds_SKB_column]==df_SKB_join_eds_src[eds_SKB_column], "True").otherwise("False"))

df_SKB_join_eds_src.repartition(1).write.csv(csv_test, sep=";", header=True)


In [None]:
csv_test = "/data/dropbox/larcher/INFOMAPS/AE01D/analyses/AE01D_analyses_mismatch.csv"

df_query_AE01D = AEO1D_res.subtract(AEO1D_src_corrige)

eds_AE01D_columns = df_query_AE01D.columns

df_query_AE01D_columns_renamed = df_query_AE01D

for eds_AE01D_columns in eds_AE01D_columns:
    df_query_AE01D_columns_renamed = df_query_AE01D_columns_renamed.withColumnRenamed(eds_AE01D_columns, "EDS_" + eds_AE01D_columns)

df_AE01D_join_eds_src = df_query_AE01D_columns_renamed.join(AEO1D_src_corrige, df_query_AE01D_columns_renamed["EDS_BASL_OUT_ID"] == AEO1D_src_corrige["BASL_OUT_ID"], how='left_outer')

for df_query_AE01D_columns_renamed in df_query_AE01D_columns_renamed:
    df_AE01D_join_eds_src = df_AE01D_join_eds_src.withColumn("FLAG_" + eds_AE01D_columns, when(df_AE01D_join_eds_src["EDS_" + eds_AE01D_columns]==df_AE01D_join_eds_src[eds_AE01D_columns], "True").otherwise("False"))

df_AE01D_join_eds_src.repartition(1).write.csv(csv_test, sep=";", header=True)


In [None]:
print(AEO1D_src_corrige_dropped.subtract(AEO1D_res_dropped).count())
AEO1D_src_corrige_dropped = AEO1D_src_corrige_dropped.subtract(AEO1D_res_dropped)

In [None]:
print(AEO1D_res_dropped.subtract(AEO1D_src_corrige_dropped).count())
AEO1D_res_dropped = AEO1D_res_dropped.subtract(AEO1D_src_corrige_dropped)

In [None]:
AEO1D_src_corrige_dropped.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/AE01D/analyses/AEO1D_src_ladm_dmbtr",header = True, sep=';')

In [None]:
AEO1D_res_dropped.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/AE01D/analyses/AEO1D_res_ladm_dmbtr",header = True, sep=';')

In [None]:
AEO1D_src_corrige.printSchema()

In [None]:
AEO1D_res.printSchema()

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38,13)))
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38,13)))
            FROM aeo1d
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM aeo1d_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
            FROM aeo1d_src2
            """

query2 = """SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
            FROM aeo1d_res
            """


df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
            FROM aeo1d_src2
            """

query2 = """SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
            FROM aeo1d_res
            """


df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_DMBTR)
            FROM aeo1d_src
            """

query2 = """SELECT SUM(LADM_DMBTR)
            FROM aeo1d
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_MWSKZ)
            FROM aeo1d_src
            """

query2 = """SELECT SUM(LADM_MWSKZ)
            FROM aeo1d
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

query2 = """SELECT SUM(LADM_TWBTRG)
            FROM aeo1d_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_DMBTR)
            FROM aeo1d_src2
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

query2 = """SELECT SUM(LADM_DMBTR)
            FROM aeo1d_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38, 12)))
            FROM aeo1d_src2
            """

query2 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38, 12)))
            FROM aeo1d
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(DMBTR AS DECIMAL(38, 2)))
            FROM aeo1d_src2
            """

query2 = """SELECT SUM(CAST(DMBTR AS DECIMAL(38, 2)))
            FROM aeo1d
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

df_tcd.show(1,False)
df_tcd2.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38,13)))
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT DISTINCT BUKRS
            FROM aeo1d
            """
query2 = """SELECT DISTINCT BUKRS
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL  
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

print(df_tcd.count())
print(df_tcd2.count())

In [None]:
query1 = """SELECT DISTINCT MONAT
            FROM aeo1d
            """
query2 = """SELECT DISTINCT MONAT
            FROM aeo1d_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

print(df_tcd.count())
print(df_tcd2.count())

In [None]:
query1 = """SELECT DISTINCT RACCT
            FROM aeo1d
            """
query2 = """SELECT DISTINCT RACCT
            FROM aeo1d_src
            WHERE BLART NOT IN ('PV','ZA')
            AND LDGRP IN ('L1','',' ') OR LDGRP IS NULL
            """

df_tcd = sqlContext.sql(query1)
df_tcd2 = sqlContext.sql(query2)

print(df_tcd.count())
print(df_tcd2.count())

In [None]:
query1 = """SELECT BUKRS, MONAT, RACCT,ACCRESPID,ACCRESPIDF,BELNR,BKTXT,BUZEI,ZZONR,SUM(LADM_TWBTRG), COUNT(*)
            FROM aeo1d_src
            GROUP BY 1,2,3,4,5,6,7,8,9
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(609,False)

In [None]:
df_tcd.count()

## OM02M

In [None]:
df_OM01M_SRC = spark.read.csv("/data/dropbox/larcher/INFOMAPS/OM02M/LDM_PRD_FRA_OM02M_200715_045511.CSV", header = True, sep = ";")

In [None]:
basl_max = df_OM01M_SRC.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min = df_OM01M_SRC.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print(basl_max)
print(basl_min)

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M4.csv"

queryOM02M = """
SELECT /* BROADCAST(BASL), BROADCAST(CDM) */
BUKRS,GJAHR,MONAT,RACCT,CDM.STATUS,CDM.REOPEN_DATE,ZUONR,ACCRESPID,'Z00Z' AS ACCRESPIDF,ASSURE,AZJ,BASL.BASL_OUT_ID,BEC_ERKENNUNG1
,BELDAT,BELNR,BETRAGSART,BKTXT,BLART,BLDAT,BNRB,BUDAT,BUPER,BUPRT,DATE3,EDS_DMBTR_INS AS LADM_DMBTR,ENTRC,ESTIMATE,FMK1,FS_BLNR1,
FS_BLNR2,FS_DCH,FS_HWAERS,FS_VBUND1,FS_VBUND2,FS_WAERS,GSP,HBSNR,ID_IRCA,INKASSOART,KHD,KUNDENNR,LDGRP,MAY,MDCF,
PAYID,PFS,REFPGA,RISKLAND,RVA,RVVERTRAG,SAP_WAERS,SCHADENOM,SEGMENT,SGTXT,SNR,SUB_ACCT,TICKID,EDS_TWBTRG AS LADM_TWBTRG,VSNR,
VTNRAB,VTNRRE,WERBERNR,WT_WITHCD,YYCSG,YYDCH,YYMAY,YYPRG,ZFBDT,ZJAHR,ZZBSZ,ZZONR

       
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet/VALID_TO=2020-07-15 09%3A52%3A50` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
WHERE GJAHR= 2020
AND MONAT = 6
AND BASL_OUT_ID < 21542947320
AND BUKRS = '9PFR'
AND (CPUDT < '2020-07-15' OR (CPUDT = '2020-07-15' AND CPUTM < '045511'))
AND VALID_FROM < '2020-06-15 00:00:00'
AND STATUS IN ('O','R')
AND RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')
AND ( RVA IN ('400','402','454','456','458','460','461','827') 
OR RVA IS NULL) 
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 1, True, ";")

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M5.csv"

queryOM02M = """
SELECT /* BROADCAST(BASL), BROADCAST(CDM_NON_LETTRES), BROADCAST(CDM) */
BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,'O') AS STATUS2 ,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID             
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
WHERE CDM.VALID_TO <= '2020-07-15 09%3A52%3A50'
AND CDM.VALID_FROM > '2020-06-15 00:00:00'
AND BASL.GJAHR= 2020
AND BASL.MONAT = 6
AND BASL.BASL_OUT_ID < 21542947320
AND BASL.BUKRS = '9PFR'
AND (BASL.CPUDT < '2020-07-15' OR (BASL.CPUDT = '2020-07-15' AND BASL.CPUTM < '045511'))
AND CDM_NON_LETTRES.STATUS IN ('O','R')
AND BASL.RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')
AND ( BASL.RVA IN ('400','402','454','456','458','460','461','827') 
OR BASL.RVA IS NULL) 
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 1, True, ";")

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M10.csv"

queryOM02M = """
SELECT /* BROADCAST(BASL), BROADCAST(CDM_NON_LETTRES), BROADCAST(CDM) */
BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,'O'),COALESCE(CDM.REOPEN_DATE,''),BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.STATUS='R'AND  VALID_FROM < '2020-07-15 00:00:00' AND '2020-07-15 00:00:00' <= CDM.VALID_TO   

WHERE BUKRS = '9PFR'
AND (CPUDT < '2020-07-15' OR (CPUDT = '2020-07-15' AND CPUTM < '045511'))
AND RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')
AND ( RVA IN ('400','402','454','456','458','460','461','827') 
OR RVA IS NULL) 
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1')
AND BASL.BASL_OUT_ID BETWEEN '""" + str(basl_min) + """' AND '""" + str(basl_max) + """'

"""


dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
queryOM02M= sqlContext.sql(queryOM02M)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 1, True, ";")

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M13.parquet"

queryOM02M = """
SELECT /* BROADCAST(BASL), BROADCAST(CDM_NON_LETTRES), BROADCAST(CDM) */
BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,'O') AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.STATUS='R'AND  VALID_FROM < '2020-07-15 00:00:00' AND '2020-07-15 00:00:00' <= CDM.VALID_TO   

WHERE BUKRS = '9PFR'
AND (CPUDT < '2020-07-15' OR (CPUDT = '2020-07-15' AND CPUTM < '045511'))

AND RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')

AND (RVA IN ('400','402','454','456','458','460','461','827') OR RVA IS NULL)
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
queryOM02M= sqlContext.sql(queryOM02M)
queryOM02M.repartition(20).write.parquet(res_OM02M)

#queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 30, True, ";")

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_new3.csv"

queryOM02M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,CDM_non_lettres.STATUS) AS STATUS2 ,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.VALID_FROM < '2020-07-15 00:00:00' AND '2020-07-15 00:00:00' <= CDM.VALID_TO   
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_non_lettres ON BASL.BASL_OUT_ID = CDM_non_lettres.BASL_OUT_ID 
AND CDM_non_lettres.VALID_FROM < '2020-07-15 00:00:00' AND '2020-07-15 00:00:00' <= CDM_non_lettres.VALID_TO AND CDM_non_lettres.STATUS = 'O'

WHERE BASL.BUKRS = '9PFR'
AND BASL.GJAHR = 2020
AND BASL.MONAT = 6

AND (BASL.CPUDT < '2020-07-15' OR (BASL.CPUDT = '2020-07-15' AND BASL.CPUTM < '045511'))
AND COALESCE(CDM.STATUS,CDM_non_lettres.STATUS) IN ('O','R')  


AND BASL.RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')

AND (BASL.RVA IN ('400','402','454','456','458','460','461','827') OR RVA IS NULL)
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
#queryOM02M= sqlContext.sql(queryOM02M)
##queryOM02M.repartition(20).write.parquet(res_OM02M)

queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 1, True, ";")

In [None]:
## REQUETE A TESTER 
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_30-06_bis4.csv"

queryOM02M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,'O') AS STATUS2 ,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` 
AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.VALID_FROM < '2020-06-30 23:59:00' 
AND '2020-06-30 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  

LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.COMPANIES_IDCOMPANIES = OWNER_BUKRS.IDCOMPANIES AND OWNER_ACCOUNTS.ACCOUNTS_IDACCOUNT = OWNER_RACCT.IDACCOUNT)


WHERE BASL.BUKRS = '9PFR'
AND BASL.GJAHR = 2020
AND BASL.MONAT = 6

AND BLART NOT IN ('PV', 'PG', 'ZA')
AND BASL.LDGRP NOT IN ('I1', 'M1', 'M3', 'K1', 'K4', 'K7')
AND (BASL.FS_LDGRP1 NOT IN ('K1','K4','K7') OR BASL.FS_LDGRP1 IS NULL)
AND (OWNER_ACCOUNTS.EXT_SYSTEM = 'X')
AND BASL.BUDAT <= '2020-06-30 23:59:00'


AND (BASL.CPUDT < '2020-06-30' OR (BASL.CPUDT = '2020-06-30' AND BASL.CPUTM < '000000'))
AND COALESCE(CDM.STATUS,'O') IN ('O','R')  


AND BASL.RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')

AND (BASL.RVA IN ('400','402','454','456','458','460','461','827') OR RVA IS NULL)
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
#queryOM02M= sqlContext.sql(queryOM02M)
##queryOM02M.repartition(20).write.parquet(res_OM02M)

queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 50, True, ";")

In [None]:
OM02M_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_30-06_bis4.csv",header=True,sep=';')
OM02M_res.registerTempTable("OM02M_res")

In [None]:
res_OM02M = "/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_30-06_bis_test.parquet"

queryOM02M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,
BASL.ASSURE,BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,
BASL.BLDAT,BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,
BASL.FMK1,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,
BASL.HBSNR,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,
BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR



FROM OM02M_res


WHERE BASL.BUKRS = '9PFR'
AND BASL.GJAHR = 2020
AND BASL.MONAT = 6

AND (BASL.CPUDT < '2020-06-30' OR (BASL.CPUDT = '2020-06-30' AND BASL.CPUTM < '000000'))

AND BASL.RACCT IN ('1140001001','1140001002','2411201024','2411201044','2411201072','2411201150','2411201151','2412001138',
'2412001139','2412001159','2412001238','2412001241','2412001262','2412001270','2412001271','2412001272','2422001041',
'4580007269','2411201186','2412001133')

AND (BASL.RVA IN ('400','402','454','456','458','460','461','827') OR RVA IS NULL)
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
}

dict_timestampFormat = {
    "REOPEN_DATE" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : False   
     }
 
}
queryOM02M= sqlContext.sql(queryOM02M)
queryOM02M.repartition(20).write.parquet(res_OM02M)

#queryOM02M= runInfoMap2(spark, queryOM02M, dict_dateFormat, dict_timestampFormat, '.', res_OM02M, "csv", 50, True, ";")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS non_lettres")
OM02M_res = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet")
OM02M_res.registerTempTable("non_lettres")

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet`
            WHERE BASL_OUT_ID IN ('21785626581')
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/parsed_data/GY01/VC/GY01_FACT_CDM.parquet`
            WHERE BASL_OUT_ID IN ('21710962893')      
            AND DATA_DATE_PARTITION > '2020-06-01'
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT *
            FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet`
            WHERE BASL_OUT_ID IN ('21535733053')
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS OM02M_src")
OM02M_src = spark.read.csv("/data/dropbox/larcher/INFOMAPS/OM02M/LDM_PRD_FRA_OM02M_200715_045511.CSV",header=True,sep=';')
OM02M_src.registerTempTable("OM02M_src")

In [None]:
query1 = """SELECT *
            FROM OM02M_res2
            WHERE BASL_OUT_ID IN('21605153448')
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(5,False)
df_tcd.count()

In [None]:
OM02M_res.count()

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS OM02M_res2")
OM02M_res2 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_30-06_bis4.csv",header=True,sep=';')
OM02M_res2.registerTempTable("OM02M_res2")

In [None]:
OM02M_res2.count(OM02M_src)

In [None]:
query1 = """SELECT *
            FROM OM02M_src
            WHERE GJAHR = 2020 
            AND MONAT = '6'
            """

df_tcd = sqlContext.sql(query1)
#df_tcd.show(5,False)
df_tcd.count()

In [None]:
OM02M_res_final = OM02M_res_final.select("BASL_OUT_ID")
df_tcd = df_tcd.select("BASL_OUT_ID")

In [None]:
substract_om02m = OM02M_res2.subtract(OM02M_src)
substract_om02m.registerTempTable("OM02M_subtract")

In [None]:
OM02M_res2.subtract(OM02M_src).show(1)

In [None]:
df_tcd.subtract(OM02M_res_final).show(1)

In [None]:
OM02M_res2.subtract(OM02M_src).count()

In [None]:
query1 = """SELECT DISTINCT STATUS2
            FROM OM02M_res2
            WHERE BASL_OUT_ID IN('21605153448')
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(5,False)
df_tcd.count()

In [None]:
query1 = """SELECT a.basl_out_id as test , b.*
            FROM OM02M_subtract as a
            RIGHT JOIN OM02M_res2 as b ON a.BASL_OUT_ID = b.BASL_OUT_ID
            """

df_tcd = sqlContext.sql(query1)
df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/OM02M/subtract2.csv",header=True,sep=';')

In [None]:
query1 = """SELECT BASL_OUT_ID, count(BLDAT)
            FROM OM02M_res2
            GROUP BY BASL_OUT_ID
            HAVING count(BLDAT) > 1 
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)
df_tcd.count()

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS OM02M_final")
OM02M_res_final = spark.read.csv("/data/dropbox/larcher/INFOMAPS/OM02M/OM02M_all_historique_v2.csv",header=True,sep=';')
OM02M_res_final.registerTempTable("OM02M_final")

In [None]:
OM02M_res_final.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/OM02M/RESULTAT/OM02M_all_historique.csv",header = True, sep=';')

In [None]:
OM02M_res_final.count()

In [None]:
OM02M_res.write.csv("/data/dropbox/larcher/INFOMAPS/OM02M/echantillion_basl_out_id.csv",header=True,sep=';')

In [None]:
OM02M_src = OM02M_src.filter("GJAHR ='2020' AND  MONAT='6'")

In [None]:
print(OM02M_src.count())
print(OM02M_res.count())

In [None]:
query1 = """SELECT DISTINCT STATUS2
            FROM OM02M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
OM02M_res = OM02M_res.drop("ACCRESPID").drop("ACCRESPIDF").drop("BLDAT").drop("LADM_DMBTR").drop("LADM_TWBTRG")

In [None]:
OM02M_src = OM02M_src.drop("ACCRESPID").drop("ACCRESPIDF").drop("BLDAT").drop("LADM_DMBTR").drop("LADM_TWBTRG")

In [None]:
OM02M_res = OM02M_res.select("BASL_OUT_ID")

In [None]:
OM02M_src = OM02M_src.select("BASL_OUT_ID")

In [None]:
OM02M_src.subtract(OM02M_res).count()

In [None]:
OM02M_res.subtract(OM02M_src).count()

In [None]:
OM02M_res.write.csv("/data/dropbox/larcher/INFOMAPS/OM02M/om02mcsv.csv",header=True,sep=';')

In [None]:
OM02M_src = OM02M_src.select("BASL_OUT_ID")
OM02M_res = OM02M_res.select("BASL_OUT_ID")

In [None]:
OM02M_src.subtract(OM02M_res).count()

In [None]:
OM02M_res.subtract(OM02M_src).count()

In [None]:
OM02M_res = OM02M_res.dropDuplicates()

In [None]:
OM02M_res.count()

In [None]:
basl_max = OM02M_src.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min = OM02M_src.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print(basl_max)
print(basl_min)

In [None]:
basl_max = OM02M_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min = OM02M_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print(basl_max)
print(basl_min)

In [None]:
query1 = """SELECT DISTINCT RACCT
            FROM OM02M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
query1 = """SELECT DISTINCT RACCT
            FROM OM02M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
OM02M_src.filter("RVA = '")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS cdm")
cdm_df = spark.read.parquet("/data/prod_env/data/parsed_data/GY01/VC/GY01_FACT_CDM.parquet")
cdm_df.registerTempTable("cdm")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS basl")
cdm_df = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/")
cdm_df.registerTempTable("basl")

In [None]:
query1 = """SELECT * 
            FROM basl
            WHERE BASL_OUT_ID IN ('19322353461','15196734930','20244031178')
            
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

# IB03D

In [None]:
res_IB03D = "/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_run_1.csv"

queryIB03D = """
SELECT /* BROADCAST(BASL), BROADCAST(CDM_NON_LETTRES), BROADCAST(CDM) */
BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,.STATUS,.REOPEN_DATE,BASL.ZUONR,BASL.ACCRESPID,BASL.ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,BASL.LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,BASL.ID_IRCA,
BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,BASL.REFPGA,BASL.RISKLAND,
BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,
BASL.LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.STATUS='R'AND  VALID_FROM < '2020-07-15 00:00:00' AND '2020-07-15 00:00:00' <= CDM.VALID_TO   

AND COALESCE(CDM.STATUS,CDM_non_lettres.STATUS) IN ('O','R')  



AND (CPUDT < '2020-07-15' OR (CPUDT = '2020-07-15' AND CPUTM < '035457'))
WHERE BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
AND RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139,'2412001159,'2412001271')
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryIB03D = runInfoMap2(spark, queryIB03D, dict_dateFormat, dict_timestampFormat, '.', res_IB03D, "csv", 1, True, ";")

# IB04D

In [None]:
SELECT /* BROADCAST(BASL), BROADCAST(CDM_NON_LETTRES), BROADCAST(CDM) */

BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT, COALESCE(CDM.STATUS,CDM_non_lettres.STATUS) AS STATUS2 , COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,BASL.ZUONR,BASL.ACCRESPID,BASL.ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,BASL.LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,BASL.LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND VALID_FROM < '2020-07-15 03:54:57' AND '2020-07-15 03:54:57' <= CDM.VALID_TO AND CDM.STATUS='R' 
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_non_lettres ON BASL.BASL_OUT_ID = CDM_non_lettres.BASL_OUT_ID 
AND VALID_FROM < '2020-07-15 03:54:57' AND '2020-07-15 03:54:57' <= cdm_mvmt_non_lettre.VALID_TO AND cdm_mvmt_non_lettre.STATUS = 'O'

WHERE COALESCE(CDM.STATUS,CDM_non_lettres.STATUS) IN ('O','R') 

AND BUKRS = '9PFR'

AND (CPUDT < '2020-07-15' OR (CPUDT = '2020-07-15' AND CPUTM < '035457'))

AND RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND ( RVA IN ('402','454','456','458','460','461','827')  
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1') 

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_run_1.csv"

queryIB04D = """
SELECT /*  BROADCAST(CDM), BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */
BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,'O') AS STATUS2 ,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM 
ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.VALID_FROM < '2020-08-03 23:59:00' 
AND '2020-08-03 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  

LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-COMPANIES.parquet/DATA_DATE_PARTITION=2020-07-30` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS.parquet/DATA_DATE_PARTITION=2020-07-30` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-07-30` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.COMPANIES_IDCOMPANIES = OWNER_BUKRS.IDCOMPANIES AND OWNER_ACCOUNTS.ACCOUNTS_IDACCOUNT = OWNER_RACCT.IDACCOUNT)

AND BLART NOT IN ('PV', 'PG', 'ZA')
AND BASL.LDGRP NOT IN ('I1', 'M1', 'M3', 'K1', 'K4', 'K7')
AND (BASL.FS_LDGRP1 NOT IN ('K1','K4','K7') OR BASL.FS_LDGRP1 IS NULL)
AND (OWNER_ACCOUNTS.EXT_SYSTEM = 'X')
AND BASL.BUDAT <= '2020-06-30 23:59:00'

AND GJAHR = 2020
AND MONAT IN (6,7)
AND BASL.BUKRS = '9PFR'
AND (BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '031759'))
AND COALESCE(CDM.STATUS,'O') IN ('O','R') 

AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.RVA IN ('402','454','456','458','460','461','827')  
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(10).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_all_historique.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT, COALESCE(CDM.STATUS,'O') AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID 
AND CDM.VALID_FROM < '2020-08-03 23:59:00' 
AND '2020-08-03 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-03 23:59:00'

LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_CDM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-29` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.COMPANIES_IDCOMPANIES = OWNER_BUKRS.IDCOMPANIES AND OWNER_ACCOUNTS.ACCOUNTS_IDACCOUNT = OWNER_RACCT.IDACCOUNT)

WHERE BASL.BLART NOT IN ('PV', 'PG', 'ZA')
AND BASL.LDGRP NOT IN ('I1', 'M1', 'M3', 'K1', 'K4', 'K7')
AND (BASL.FS_LDGRP1 NOT IN ('K1','K4','K7') OR BASL.FS_LDGRP1 IS NULL)
AND (OWNER_ACCOUNTS.EXT_SYSTEM = 'X')

AND BASL.BUKRS = '9PFR'
AND (BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '031759'))

AND COALESCE(CDM.STATUS,'O') IN ('O','R')

AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.RVA IN ('402','454','456','458','460','461','827')  
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS ib04d_src")
df = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB04D/LDM_PRD_FRA_IB04D_200803_031759.CSV",header=True, sep=';')
df.registerTempTable("ib04d_src")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS ib04d_res")
df_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_test2_with_cdm.csv",header=True, sep=';')
df_res.registerTempTable("ib04d_res")

In [None]:
print(df.count())
print(df_res.count())

In [None]:
query1 = """SELECT DISTINCT ZZONR
            FROM ib04d_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(20,False)

In [None]:
df_echantillion = df.select('BASL_OUT_ID') 
df_res_echantillion = df_res.select('BASL_OUT_ID')

In [None]:
print(df_res_echantillion.subtract(df_echantillion).count())
print(df_echantillion.subtract(df_res_echantillion).count())

In [None]:
df_res_echantillion.count()

In [None]:
subtract_res = df_res_echantillion.subtract(df_echantillion)
subtract_res.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IB04D/analyses/sebtract_res.csv",header = True, sep=';')

In [None]:
subtract_src = df_echantillion.subtract(df_res_echantillion)
subtract_src.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IB04D/analyses/sebtract_src.csv",header = True, sep=';')

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38,13)))
            FROM ib04d_src
            WHERE GJAHR = 2020 
            AND MONAT = 6
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_TWBTRG AS DECIMAL(38,13)))
            FROM ib04d
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_DMBTR AS DECIMAL(38,13)))
            FROM ib04d_src
            WHERE GJAHR = 2020 
            AND MONAT = 6
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(CAST(LADM_DMBTR AS DECIMAL(38,13)))
            FROM ib04d
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/parsed_data/GY01/VC/GY01_FACT_CDM.parquet`
            WHERE BASL_OUT_ID IN ('2299827958','4421029513','4824923662','2925304183','3644301678','2929888507','4773164457','4773163281','1561801842','2937486726','870690527','4297108604','4297108330')
           """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet`
            WHERE BASL_OUT_ID IN ('2299827958','4421029513','4824923662','2925304183','3644301678','2929888507','4773164457','4773163281','1561801842','2937486726','870690527','4297108604','4297108330')
            AND DATA_DATE_PARTITION > '2020-06-29'
           """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet`
            WHERE BASL_OUT_ID IN ('19702047285','21772763632','21772763629','20706775850','19702047284','19696475721','21772763633','21772763631','20706775851','21772763630','19696475722')
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(20,False)

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_all_historique.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT, COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID             
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-03 23:59:00' 
AND '2020-08-03 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-03 23:59:00'

WHERE BASL.BUKRS = '9PFR'
AND COALESCE(CDM.STATUS,'O') IN ('O','R')

AND (
(
(BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '031759'))
AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.RVA IN ('402','454','456','458','460','461','827')  
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
)
OR
(
(BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '021747'))
AND BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
AND RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND LDGRP IN (' ', 'L1')
AND ZZONR NOT IN ('REGRP1')
)


)

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D+IB03D.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT, COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID             
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-03 23:59:00' 
AND '2020-08-03 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-03 23:59:00'

AND BASL.BUKRS = '9PFR'
AND COALESCE(CDM.STATUS,'O') IN ('O','R')
AND MONAT = 6 
AND GJAHR = 2020
AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 

AND (
    (
    (BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '031759'))
    AND BASL.RVA IN ('402','454','456','458','460','461','827')  
     )
    OR
    (
    BASL.CPUDT < '2020-08-03' OR (BASL.CPUDT = '2020-08-03' AND BASL.CPUTM < '021747')
    AND BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
    )
)

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D+IB03D_without_cdm.parquet"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 

AND (


        (
            BASL.BUKRS = '9PFR'
            AND (BASL.CPUDT < '2020-08-06' OR (BASL.CPUDT = '2020-08-06' AND BASL.CPUTM < '062314'))
            AND BASL.RVA IN ('402','454','456','458','460','461','827')  
         )
    OR
        (
            BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
            AND (BASL.CPUDT < '2020-08-06' OR (BASL.CPUDT = '2020-08-06' AND BASL.CPUTM < '022304'))
             
        )
    )

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(10).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
sqlContext.sql("DROP TABLE IF EXISTS ib04d_src")
df_res2 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB03D/LDM_PRD_FRA_IB03D_200806_022304.CSV",header=True,sep=';')
df_res2.registerTempTable("ib04d_src")

In [None]:
sqlContext.sql("DROP TABLE IF EXISTS ib04d_res2")
df_res2 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D+IB03D_with_cdm2_all_histo_test.csv",header=True,sep=';')
df_res2.registerTempTable("ib04d_res2")

In [None]:
df_res2.count()

In [None]:
query = """ SELECT DISTINCT BLART
            FROM ib04d_src
            WHERE GJAHR = 2020 AND MONAT = 6
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.show()

In [None]:
query = """ SELECT DISTINCT RACCT
            FROM ib04d_res2
            WHERE BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
            
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.show()

In [None]:
query = """ SELECT DISTINCT BLART
            FROM ib04d_res2
            WHERE BUKRS IN ('9PFR')
            
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.show()

In [None]:
df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_resultat",header = True, sep=';')

In [None]:
query = """ SELECT *
            FROM ib04d_res2
            WHERE BUKRS IN ('9PFR')
            AND STATUS IN ('O','R')
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.count()

In [None]:
query = """ SELECT *
            FROM ib04d_res2
            WHERE BUKRS IN ('9PFR')
            AND STATUS IN ('O','R')
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.show()

In [None]:
query = """ SELECT DISTINCT LADM_
            FROM ib04d_res2
            WHERE BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
        """ 

df_tcd = sqlContext.sql(query)
df_tcd.show(61)

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D+IB03D_with_cdm2_all_histo_test.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3, LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID, LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG, ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/dropbox/larcher/INFOMAPS/IB04D/IB04D+IB03D_without_cdm.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID             
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-06 23:59:00' 
AND '2020-08-06 23:59:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-06 23:59:00'

WHERE BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT','9PFR')
AND COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IN ('O','R')
AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
##### test IB04M nouveau code 

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_test3_wt_cdm.parquet"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 


AND (


        (
            BASL.BUKRS = '9PFR'
            AND (BASL.CPUDT < '2020-08-19' OR (BASL.CPUDT = '2020-08-19' AND BASL.CPUTM < '033721'))
            AND BASL.RVA IN ('402','454','456','458','460','461','827')   
         )
    OR
        (
            BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')
            AND (BASL.CPUDT < '2020-08-19' OR (BASL.CPUDT = '2020-08-19' AND BASL.CPUTM < '023712'))
             
        )
    )

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(50).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_test_1908_wt_cdm.parquet"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/2011_2012_BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS = '9PFR'
AND (BASL.CPUDT < '2020-08-19' OR (BASL.CPUDT = '2020-08-19' AND BASL.CPUTM < '033721'))
AND BASL.RVA IN ('402','454','456','458','460','461','827')   


"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(10).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2020_change_hour.parquet"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND GJAHR = 2020
AND (BASL.CPUDT < '2020-08-25' OR (BASL.CPUDT = '2020-08-25' AND BASL.CPUTM < '030000'))
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS = '9PFR'
AND BASL.RVA IN ('402','454','456','458','460','461','827')   


"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(20).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2019.parquet"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND GJAHR = 2019
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS = '9PFR'
AND BASL.RVA IN ('402','454','456','458','460','461','827')   


"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB04D= sqlContext.sql(queryIB04D)
dfIB04D.repartition(20).write.parquet(res_IB04D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_test2_with_cdm.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3, LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID,EDS_TWBTRG AS LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG, ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR



FROM parquet.`/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_test2_wt_cdm.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-08-18 03:36:29' 
AND '2020-08-18 03:36:29' <= CDM_NON_LETTRES.VALID_TO   
AND BASL.BUDAT <= '2020-08-18 03:36:29'

LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-18 03:36:29' 
AND '2020-08-18 03:36:29' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-18 03:36:29'


WHERE COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IN ('O','R')

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
# CONCATENATION DES DF 

In [None]:
ib04d_2011_2012 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2011_2012.parquet")
ib04d_2013 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2013.parquet")
ib04d_2014_15 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2014_2015.parquet")
ib04d_2016 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2016.parquet")
ib04d_2017 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2017.parquet")
ib04d_2018 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2018.parquet")
ib04d_2019 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2019.parquet")
ib04d_2020 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_2020_change_hour.parquet")

In [None]:
print(ib04d_2011_2012.count())
print(ib04d_2013.count())
print(ib04d_2014_15.count())

In [None]:
from functools import reduce
from pyspark.sql import DataFrame

dfs = [ib04d_2011_2012,ib04d_2013,ib04d_2014_15,ib04d_2016,ib04d_2017,ib04d_2018,ib04d_2019,ib04d_2020]
df = reduce(DataFrame.unionAll, dfs)

In [None]:
df.count()

In [None]:
df.repartition(50).write.parquet('/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_concat_2508_change_hour.parquet')

In [None]:
res_IB04D = "/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_concat_with_cdm_change_hour.csv"

queryIB04D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3, LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID, LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG, ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_concat_2508_change_hour.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-08-25 03:00:00' 
AND '2020-08-25 03:00:00' <= CDM_NON_LETTRES.VALID_TO   
AND BASL.BUDAT <= '2020-08-25 03:00:00'

LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-25 03:00:00' 
AND '2020-08-25 03:00:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-08-25 03:00:00'


WHERE COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IN ('O','R')
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 1, True, ";")


In [None]:
AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS = '9PFR'
AND (BASL.CPUDT < '2020-08-25' OR (BASL.CPUDT = '2020-08-25' AND BASL.CPUTM < '034429'))
AND BASL.RVA IN ('402','454','456','458','460','461','827')   

In [None]:
df_src = spark.read.csv('/data/dropbox/larcher/INFOMAPS/IB04D/LDM_PRD_FRA_IB04D_200825_034429.CSV',header= True,sep=';')

In [None]:
df_res = spark.read.csv('/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_concat_with_cdm.csv',header= True,sep=';')

In [None]:
df_res.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IB04D/IB04D_concat_with_cdm_analyse.csv",header=True,sep=';')

In [None]:
print(df_src.count())
print(df_res.count())

In [None]:
print(df_res.count()-df_src.count())

In [None]:
df_src_basl_id = df_src.select("BASL_OUT_ID")
df_res_basl_id = df_res.select("BASL_OUT_ID")

In [None]:
df_res.subtract(df_src).count()

In [None]:
df_res_basl_id.subtract(df_src_basl_id).count()

In [None]:
df_src_basl_id.subtract(df_res_basl_id).count()

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet`
            WHERE BASL_OUT_ID IN ('22102729766')
            AND DATA_DATE_PARTITION > '2020-07-29'
           """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/parsed_data/GY01/VC/GY01_FACT_CDM.parquet`
            WHERE BASL_OUT_ID IN ('22102729766')      
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT BASL_OUT_ID, CLEARING_DATE 
            FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet`
            WHERE BASL_OUT_ID IN ('22281753916','22265645223','22280744434','22262717822','22269064072','22269064081','22269064036','22260787289','22257609703','22269064145','22269064165','22280744443','22280744433','22280744361','22281753908','22280744369','22280744444','22280744385','22280744414','22280744423','22280744420','22287365507','22287365490','22287365506','22287365480','22287365359','22287365355','22287365353','22287365350','22287365349','22287365505','22287365510','22287365354','22287365348','22287365358','22287365356','22287365352','22287365513','22287365504','22287365509','22287365508','22287365351','22287365512','22287365357','22262717678','22262717708') 
           """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

## IB03D

In [None]:
res_IB03D = "/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2020.parquet"

queryIB03D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3,EDS_DMBTR_INS AS LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG,BASL.ZFBDT_AUDIT AS ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR,BASL.CPUDT,BASL.CPUTM


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL


WHERE BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND GJAHR = 2020
AND (BASL.CPUDT < '2020-08-26' OR (BASL.CPUDT = '2020-08-26' AND BASL.CPUTM < '024524'))
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')



"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
dfIB03D= sqlContext.sql(queryIB03D)
dfIB03D.repartition(20).write.parquet(res_IB03D)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
# CONCATENATION DES DF 

In [None]:
ib03d_2020 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2020.parquet")
ib03d_2019 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2019.parquet")
ib04d_2018 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2018.parquet")
ib04d_2016_2017 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2016_2017.parquet")
ib04d_2014_2015 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2014_2015.parquet")
ib04d_2011_2012_2013 = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_2011_2012_2013.parquet")

In [None]:
from functools import reduce
from pyspark.sql import DataFrame

dfs = [ib04d_2011_2012_2013,ib04d_2014_2015,ib04d_2016_2017,ib04d_2018,ib03d_2019,ib03d_2020]
df = reduce(DataFrame.unionAll, dfs)

In [None]:
df.count()

In [None]:
df.repartition(20).write.parquet('/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_concat_2608.parquet')

In [None]:
res_IB03D = "/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_concat_with_cdm.csv"

queryIB03D = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS,COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE,
BASL.ZUONR,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.ASSURE,
BASL.AZJ,BASL.BASL_OUT_ID,BASL.BEC_ERKENNUNG1,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BKTXT,BASL.BLART,BASL.BLDAT,
BASL.BNRB,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.DATE3, LADM_DMBTR,BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FS_BLNR1,
BASL.FS_BLNR2,BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,BASL.GSP,BASL.HBSNR,
BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KUNDENNR,BASL.LDGRP,BASL.MAY,BASL.MDCF,BASL.PAYID,BASL.PFS,
BASL.REFPGA,BASL.RISKLAND,BASL.RVA,BASL.RVVERTRAG,BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SEGMENT,BASL.SGTXT,
BASL.SNR,BASL.SUB_ACCT,BASL.TICKID, LADM_TWBTRG,BASL.VSNR,BASL.VTNRAB,BASL.VTNRRE,BASL.WERBERNR,
BASL.WT_WITHCD,BASL.YYCSG,BASL.YYDCH,BASL.YYMAY,BASL.YYPRG, ZFBDT,BASL.ZJAHR,BASL.ZZBSZ,BASL.ZZONR


FROM parquet.`/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_concat_2608.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-08-26 02:45:24' 
AND '2020-08-26 02:45:24' <= CDM_NON_LETTRES.VALID_TO   
AND BASL.BUDAT <= '2020-08-26 02:45:24'

LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-08-26 02:45:24' 
AND '2020-08-26 02:45:24' <= CDM.VALID_TO   
AND BASL.BUDAT <= '2020-08-26 02:45:24'


WHERE COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IN ('O','R')
AND BASL.RACCT IN ('1140001001','2411201151','2411201186','2412001138','2412001139','2412001159','2412001271')
AND BASL.LDGRP IN (' ', 'L1')
AND BASL.ZZONR NOT IN ('REGRP1') 
AND BASL.BUKRS IN ('9PBL','9PGF','9PGP','9PMF','9PMQ','9PNC','9PPF','9PPM','9PRE','9PYT')

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    
    
}

dict_timestampFormat = {

}
# dfIB04D= sqlContext.sql(queryIB04D)
# dfIB04D.repartition(10).write.parquet(res_IB04D)

queryIB03D = runInfoMap2(spark, queryIB03D, dict_dateFormat, dict_timestampFormat, '.', res_IB03D, "csv", 1, True, ";")


In [None]:
ib03_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB03D/IB03D_concat_with_cdm.csv",header=True,sep=';')
ib03_res.registerTempTable("IB03_res")

In [None]:
ib03_res.count()

In [None]:
ib03_src = spark.read.csv("/data/dropbox/larcher/INFOMAPS/IB03D/LDM_PRD_FRA_IB03D_200826_024524.CSV",header=True,sep=';')
ib03_src.registerTempTable("IB03_src")

In [None]:
ib03_src.count()

In [None]:
ib03_res_bsl = ib03_res.select("BASL_OUT_ID")
ib03_src_bsl = ib03_src.select("BASL_OUT_ID")

In [None]:
ib03_res_bsl.subtract(ib03_src_bsl).count()

In [None]:
ib03_src_bsl.subtract(ib03_res_bsl).count()

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM IB03_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM IB03_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_DMBTR)
            FROM IB03_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
            FROM IB03_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT EDS_DMBTR_INS 
            FROM BASL_AZF_DATE_CPT
            WHERE GJAHR = 2014 
            AND MONAT = 5
            AND RACCT = 2411201151
            AND BASL_OUT_ID = 4098291114
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

# VH03M

In [None]:
res_VH03M = "/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_1709.parquet"

queryVH03M = """
SELECT /* BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */ 

BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,
BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4,EDS_DMBTR_INS AS LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,EDS_BASISBETRAG AS LADM_BASISBETRAG,
TAX_RATE AS LADM_TAX_RATE,EDS_MWSKZ AS LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
EDS_TWBTRG AS LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)

WHERE BASL.GJAHR = 2020
AND BASL.BUKRS IN ('9LFR','9RFR')
AND (BASL.CPUDT < '2020-09-02' OR (BASL.CPUDT = '2020-09-02' AND BASL.CPUTM < '140000'))
AND BASL.RACCT IN ('4414001041','4414001058','4414001059','4414001593','4414001594','4414001595','4414001597','4414001598')
AND BASL.MONAT IN (8,9)

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "WWERT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
    
}

dict_timestampFormat = {

}
dfVH03M= sqlContext.sql(queryVH03M)
dfVH03M.repartition(50).write.parquet(res_VH03M)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
df_non_lettres = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet")

In [None]:
df_historises = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet")

In [None]:
df_non_lettres.printSchema()

In [None]:
df_historises.printSchema()

In [None]:
res_VH03M = "/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_1709.csv"

queryVH03M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,

COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS, 
COALESCE(CDM.VALID_FROM,CDM_NON_LETTRES.VALID_FROM) AS VALID_FROM, 
COALESCE(CDM.VALID_TO,CDM_NON_LETTRES.VALID_TO) AS VALID_TO,
COALESCE(CDM.CLEARING_ID,'') AS CLEARING_ID,
COALESCE(CDM.CLEARING_DATE,'') AS CLEARING_DATE,
COALESCE(CDM.CLEARING_USERNAME,'') AS CLEARING_USERNAME,
COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,
COALESCE(CDM.REOPEN_USERNAME,'') AS REOPEN_USERNAME,

A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,BASL.BASISBETRAG,BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4, LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,LADM_BASISBETRAG,
LADM_TAX_RATE,LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_without_cdm_septembre_update_basisbetrag.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID 
AND CDM_NON_LETTRES.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM_NON_LETTRES.VALID_TO   
AND BASL.BUDAT <= '2020-09-02 14:00:00'

LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-09-02 14:00:00'

WHERE COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IS NOT NULL
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
      "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
    
    
}

dict_timestampFormat = {

}

# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

queryVH03M = runInfoMap2(spark, queryVH03M, dict_dateFormat, dict_timestampFormat, '.', res_VH03M, "csv", 1, True, ";")


In [None]:
res_VH03M = "/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm_1709.csv"

queryVH03M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,

COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS, 
COALESCE(CDM.VALID_FROM,CDM_NON_LETTRES.VALID_FROM) AS VALID_FROM, 
COALESCE(CDM.VALID_TO,CDM_NON_LETTRES.VALID_TO) AS VALID_TO,
COALESCE(CDM.CLEARING_ID,'') AS CLEARING_ID,
COALESCE(CDM.CLEARING_DATE,'') AS CLEARING_DATE,
COALESCE(CDM.CLEARING_USERNAME,'') AS CLEARING_USERNAME,
COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,
COALESCE(CDM.REOPEN_USERNAME,'') AS REOPEN_USERNAME,

A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4, LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,LADM_BASISBETRAG,
LADM_TAX_RATE,LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_1709.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID 
AND CDM_NON_LETTRES.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM_NON_LETTRES.VALID_TO   


LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM.VALID_TO  

WHERE COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) IS NOT NULL
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
      "WWERT" : {
     "format" : "yyyyMMdd",
     "upper" : False   
     }
    
    
    
}

dict_timestampFormat = {
    "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
}

# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

queryVH03M = runInfoMap2(spark, queryVH03M, dict_dateFormat, dict_timestampFormat, '.', res_VH03M, "csv", 1, True, ";")


In [None]:
df_VH03M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200902_135314.CSV",header=True,sep=';')
df_VH03M.registerTempTable("VH03M_src")

In [None]:
df_VH03M.count()

In [None]:
df_VH03M_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm_septembre_update_budat.csv",header=True,sep=';')
df_VH03M_res.registerTempTable("VH03M_res")

In [None]:
df_VH03M_res.count()

In [None]:
df_VH03M_res = df_VH03M_res.drop("KAPLA")

In [None]:
df_VH03M.printSchema()

In [None]:
df_VH03M_res.printSchema()

In [None]:
df_VH03M_basl = df_VH03M.select("BASL_OUT_ID")

In [None]:
df_VH03M_res_basl = df_VH03M_res.select("BASL_OUT_ID")

In [None]:
df_VH03M_basl.subtract(df_VH03M_res_basl).count()

In [None]:
df_VH03M_res_basl.subtract(df_VH03M_basl).count()

In [None]:
query1 = """SELECT DISTINCT WWERT 
            FROM VH03M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT SUM(LDGRP)
            FROM VH03M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM VH03M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG)
            FROM VH03M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_MWSKZ)
            FROM VH03M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

In [None]:
query1 = """SELECT SUM(LADM_MWSKZ)
            FROM VH03M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(1,False)

# VH01M

In [None]:
res_VH01M = "/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_1709_2.parquet"

queryVH01M = """
SELECT /* BROADCAST(OWNER_BUKRS), BROADCAST(OWNER_RACCT), BROADCAST(OWNER_ACCOUNTS) */ 

BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,
BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4,EDS_DMBTR_INS AS LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,EDS_BASISBETRAG AS LADM_BASISBETRAG,
TAX_RATE AS LADM_TAX_RATE,EDS_MWSKZ AS LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
EDS_TWBTRG AS LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_BUKRS ON OWNER_BUKRS.BUKRS = BASL.BUKRS
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_RACCT ON OWNER_RACCT.RACCT = BASL.RACCT
INNER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` AS OWNER_ACCOUNTS ON (OWNER_ACCOUNTS.ID_BUKRS = OWNER_BUKRS.ID_BUKRS AND OWNER_ACCOUNTS.ID_RACCT = OWNER_RACCT.ID_RACCT)

WHERE BASL.GJAHR = 2020
AND (BASL.BUKRS LIKE '9%' OR BASL.BUKRS = 'NCAL')
AND (BASL.CPUDT < '2020-09-02' OR (BASL.CPUDT = '2020-09-02' AND BASL.CPUTM < '140000'))
AND BASL.RACCT = '2422001072'
AND BASL.MONAT = 8

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
    
    
}

dict_timestampFormat = {

}
dfVH01M= sqlContext.sql(queryVH01M)
dfVH01M.repartition(20).write.parquet(res_VH01M)

#queryIB04D = runInfoMap2(spark, queryIB04D, dict_dateFormat, dict_timestampFormat, '.', res_IB04D, "csv", 10, True, ";")


In [None]:
res_VH01M = "/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_with_cdm_septembre.csv"

queryVH01M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,

COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS, 
COALESCE(CDM.VALID_FROM,CDM_NON_LETTRES.VALID_FROM) AS VALID_FROM, 
COALESCE(CDM.VALID_TO,CDM_NON_LETTRES.VALID_TO) AS VALID_TO,
COALESCE(CDM.CLEARING_ID,'') AS CLEARING_ID,
COALESCE(CDM.CLEARING_USERNAME,'') AS CLEARING_USERNAME,
COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,
COALESCE(CDM.REOPEN_USERNAME,'') AS REOPEN_USERNAME,

A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,BASL.BASISBETRAG,BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4, LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,LADM_BASISBETRAG,
LADM_TAX_RATE,LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_1709_2.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM_NON_LETTRES.VALID_TO   
AND BASL.BUDAT <= '2020-09-02 14:00:00'

LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM.VALID_TO  
AND CDM.VALID_TO > BASL.BUDAT  
AND BASL.BUDAT <= '2020-09-02 14:00:00'

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
    
}

dict_timestampFormat = {

}

# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

queryVH01M = runInfoMap2(spark, queryVH01M, dict_dateFormat, dict_timestampFormat, '.', res_VH01M, "csv", 1, True, ";")


In [None]:
res_VH01M = "/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_with_cdm_septembre_1709.csv"

queryVH01M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,

COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS, 
COALESCE(CDM.VALID_FROM,CDM_NON_LETTRES.VALID_FROM) AS VALID_FROM, 
COALESCE(CDM.VALID_TO,CDM_NON_LETTRES.VALID_TO) AS VALID_TO,
COALESCE(CDM.CLEARING_ID,'') AS CLEARING_ID,
COALESCE(CDM.CLEARING_DATE,'') AS CLEARING_DATE,
COALESCE(CDM.CLEARING_USERNAME,'') AS CLEARING_USERNAME,
COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,
COALESCE(CDM.REOPEN_USERNAME,'') AS REOPEN_USERNAME,

A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4, LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,LADM_BASISBETRAG,
LADM_TAX_RATE,LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_1709_2.parquet` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM_NON_LETTRES.VALID_TO   


LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM.VALID_TO  


"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
    
}

dict_timestampFormat = {
    "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
}

# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

queryVH01M = runInfoMap2(spark, queryVH01M, dict_dateFormat, dict_timestampFormat, '.', res_VH01M, "csv", 1, True, ";")


In [None]:
res_VH01M = "/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_test_basl_and_cdm.csv"

queryVH01M = """
SELECT BASL.BUKRS,BASL.GJAHR,BASL.MONAT,BASL.RACCT,

COALESCE(CDM.STATUS,CDM_NON_LETTRES.STATUS) AS STATUS, 
COALESCE(CDM.VALID_FROM,CDM_NON_LETTRES.VALID_FROM) AS VALID_FROM, 
COALESCE(CDM.VALID_TO,CDM_NON_LETTRES.VALID_TO) AS VALID_TO,
COALESCE(CDM.CLEARING_ID,'') AS CLEARING_ID,
COALESCE(CDM.CLEARING_DATE,'') AS CLEARING_DATE,
COALESCE(CDM.CLEARING_USERNAME,'') AS CLEARING_USERNAME,
COALESCE(CDM.REOPEN_DATE,'') AS REOPEN_DATE ,
COALESCE(CDM.REOPEN_USERNAME,'') AS REOPEN_USERNAME,

A_ALTACCOUNT,BASL.ABZ,BASL.ACCRESPID,'Z00Z' AS ACCRESPIDF,BASL.AJ,BASL.ASSURE,
BASL.AWKEY,BASL.AZJ,BASL.AZP,BASL.BASL_OUT_ID,BASL.BEC,BASL.BEC_ERKENNUNG1,BASL.BEC_ERKENNUNG2,
BASL.BEC_ERKENNUNG3,BASL.BEC_ERKENNUNG7,BASL.BELDAT,BASL.BELNR,BASL.BETRAGSART,BASL.BEWAR,BASL.BGZN,BASL.BKTXT,
BASL.BLART,BASL.BLDAT,BASL.BNRB,BASL.BSCHL,BASL.BUBSZ,BASL.BUDAT,BASL.BUPER,BASL.BUPRT,BASL.BUZEI,BASL.CISIN,
BASL.CPUDT,BASL.CSG,BASL.DATE1,BASL.DATE2,BASL.DATE3,BASL.DATE4, EDS_DMBTR_INS AS LADM_DMBTR,
BASL.ENTRC,BASL.ESTIMATE,BASL.FMK1,BASL.FMK2,BASL.FS_BLNR1,BASL.FS_BLNR2,BASL.FS_BSCHL,BASL.FS_BUKRS,
BASL.FS_DCH,BASL.FS_HWAERS,BASL.FS_KOSTL,BASL.FS_LDGRP1,BASL.FS_VBUND1,BASL.FS_VBUND2,BASL.FS_WAERS,
BASL.GJAGJE,BASL.GJVJ,BASL.GPOS,BASL.GSA,BASL.GSAKAT,BASL.GSBER,BASL.GSP,BASL.HBSNR,BASL.HGPOS,
BASL.HWAER,BASL.ID_IRCA,BASL.INKASSOART,BASL.KHD,BASL.KOSTL,BASL.KUNDENNR,EDS_BASISBETRAG AS LADM_BASISBETRAG,
TAX_RATE AS LADM_TAX_RATE,EDS_MWSKZ AS LADM_MWSKZ,BASL.LANDL,BASL.LDGRP,BASL.LIFNR,BASL.MAY,BASL.MDCF,
BASL.MWSKZ,BASL.MWST_SATZ,BASL.PAYID,BASL.PERNR,BASL.PFS,BASL.PRG,BASL.PRODSCHL,BASL.PRODSCHL_FS,
BASL.PROJK,BASL.QTE_UC,BASL.REFPGA,BASL.RIP,BASL.RISK,BASL.RISKLAND,BASL.RVA,BASL.RVJAHR,BASL.RVVERTRAG,
BASL.SAP_WAERS,BASL.SCHADENOM,BASL.SDARTGRP,BASL.SEGMENT,BASL.SGTXT,BASL.SHKZG,BASL.SNR,BASL.SUB_ACCT,
BASL.SUBRISK,BASL.TARIF,BASL.TAX_OPT,BASL.TAX_RATE,BASL.TCMU,BASL.TCODE,BASL.TICKID,BASL.TWBTRG,
EDS_TWBTRG AS LADM_TWBTRG,BASL.USNAM,BASL.VBUND,BASL.VMK,BASL.VORVNR,BASL.VSNR,BASL.VT_AZP_ORIGINAL,
BASL.VTNRAB,BASL.VTNRRE,BASL.VUNR,BASL.WAERS,BASL.WERBERNR,BASL.WITHT,BASL.WRBTR,BASL.WT_QBUIHH,
BASL.WT_QSSHH,BASL.WT_WITHCD,BASL.WWERT,BASL.XREF3,BASL.YYCSG,BASL.YYDCH,BASL.YYLOB,BASL.YYMAY,
BASL.YYPRG,BASL.ZFBDT,BASL.ZJAHR,BASL.ZUBRINGER,BASL.ZUGJAHR,BASL.ZUGMONAT,BASL.ZZBSZ,BASL.ZZFMK1,
BASL.ZZFMK2,BASL.ZZONR,BASL.ZZVERD,BASL.ZZVMK


FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL
LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_non_lettre.parquet` AS CDM_NON_LETTRES ON BASL.BASL_OUT_ID = CDM_NON_LETTRES.BASL_OUT_ID
AND CDM_NON_LETTRES.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM_NON_LETTRES.VALID_TO   


LEFT JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/GY01/cdm_mvmt_historise.parquet` AS CDM ON BASL.BASL_OUT_ID = CDM.BASL_OUT_ID             
AND CDM.VALID_FROM < '2020-09-02 14:00:00' 
AND '2020-09-02 14:00:00' <= CDM.VALID_TO  


"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "REOPEN_DATE" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
     
     "CLEARING_DATE" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
     "WWERT" : {
     "format" : "yyyyMMdd:HH:mm:ss",
     "upper" : False   
     }
    
    
}

dict_timestampFormat = {
    "VALID_FROM" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "VALID_TO" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
}

# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

queryVH01M = runInfoMap2(spark, queryVH01M, dict_dateFormat, dict_timestampFormat, '.', res_VH01M, "csv", 1, True, ";")


In [None]:
df_VH01M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH01M/LDM_PRD_FRA_VH01M_200804_142003.CSV",header=True,sep=';')
df_VH01M.registerTempTable("VH01M_src")

In [None]:
df_VH01M.count()

In [None]:
query1 = """SELECT DISTINCT BUDAT
df_query_VH01 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_with_cdm_septembre.csv",header=True,sep=';')
df_query_VH01.registerTempTable("VH01M_res")

In [None]:
df_query_VH01.count()

In [None]:
query1 = """SELECT DISTINCT BUDAT
            FROM VH01M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(50,False)

In [None]:
query1 = """SELECT DISTINCT BUDAT
            FROM VH01M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(50,False)

In [None]:
df_VH01M_basl = df_VH01M.select("BASL_OUT_ID")
df_VH01M_res_basl = df_VH01M_res.select("BASL_OUT_ID")


## ANALYSE VH01M 

In [None]:
df_VH01M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH01M/LDM_PRD_FRA_VH01M_200902_145318.CSV",header=True,sep=';')
df_VH01M.registerTempTable("VH01M_src")

In [None]:
df_VH01M.count()

In [None]:
df_VH01M_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_with_cdm_septembre_badat_edited.csv/",header=True,sep=';')
df_VH01M_res.registerTempTable("VH01M_res")

In [None]:
df_VH01M_res.count()

In [None]:
BUKRS	RACCT	AZJ	PAYID	SCHADENOM	SNR	ZZONR	GSAKAT	PRODSCHL	SUBRISK	FMK2	BUDAT	CPUDT	LADM_BASISBETRAG	TAX_RATE	LADM_TWBTRG


In [None]:
df_VH01M_res = df_VH01M_res.select("BUKRS","RACCT","AZJ","PAYID","SCHADENOM","SNR","ZZONR","GSAKAT","PRODSCHL","SUBRISK","FMK2","BUDAT","CPUDT","LADM_BASISBETRAG","TAX_RATE","LADM_TWBTRG")

In [None]:
df_VH01M.drop("BUDAT","CPUDT","LADM_TWBTRG").subtract(df_VH01M_res.select("BUKRS","RACCT","AZJ","PAYID","SCHADENOM","SNR","ZZONR","GSAKAT","PRODSCHL","SUBRISK","FMK2","LADM_BASISBETRAG","TAX_RATE")).count()

In [None]:
df_VH01M.subtract(df_VH01M_res).count()

In [None]:
df_VH01M_res.select("BUDAT","CPUDT").show(1)

In [None]:
df_VH01M.select("BUDAT","CPUDT").show(1)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG) 
            FROM VH01M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(50,False)

In [None]:
query1 = """SELECT SUM(LADM_TWBTRG) 
            FROM VH01M_res
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(50,False)

In [None]:
df_VH01M.repartition(1).write.parquet("/data/dropbox/larcher/INFOMAPS/VH01M/LDM_PRD_FRA_VH01M_200902_145318.parquet")

In [None]:
df_VH01M_parquet = spark.read.parquet("/data/dropbox/larcher/INFOMAPS/VH01M/LDM_PRD_FRA_VH01M_200902_145318.parquet")

In [None]:
df_VH01M_parquet.count()

In [None]:
res_VH01M = "/data/dropbox/larcher/INFOMAPS/VH01M/join_for_basloutid.parquet"

queryVH01M = """

SELECT src.BUKRS, src.RACCT, src.SCHADENOM, src.SNR, src.PRODSCHL, src.BUDAT, src.CPUDT, src.LADM_BASISBETRAG, BASL_OUT_ID


FROM parquet.`/data/dropbox/larcher/INFOMAPS/VH01M/LDM_PRD_FRA_VH01M_200902_145318.parquet` AS src
INNER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL ON src.BUKRS = BASL.BUKRS AND src.RACCT = BASL.RACCT AND src.SCHADENOM = BASL.SCHADENOM 
AND src.SNR = BASL.SNR AND src.PRODSCHL = BASL.PRODSCHL AND src.BUDAT = BASL.BUDAT AND src.CPUDT = BASL.CPUDT AND src.LADM_BASISBETRAG = BASL.EDS_BASISBETRAG 

"""
# dfVH03M= sqlContext.sql(queryVH03M)
# dfVH03M.repartition(50).write.parquet(res_VH03M)

df_VH03M = sqlContext.sql(queryVH01M)
df_VH03M.repartition(1).write.parquet(res_VH01M)

# ANALYSE VH03M

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/parsed_data/GY04/VC/GY04_SKB1.parquet`
            WHERE BASL_OUT_ID IN ('22324233943')      
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT * 
            FROM parquet.`/data/prod_env/data/parsed_data/GY01/VC/GY01_FACT_CDM.parquet`
            WHERE BASL_OUT_ID IN ('22324233943')      
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
df_query_VH01 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm2.csv",header=True,sep=';')
df_query_VH01.createOrReplaceTempView("table_VH01_src")

In [None]:
df_VH01M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200804_132001.CSV",header=True,sep=';')
df_VH01M.createOrReplaceTempView("table_VH01_res")

In [None]:
df_VH03M_basl.subtract(df_VH03M_res_basl).count()

In [None]:
res_VH01 = "/data/dropbox/larcher/INFOMAPS/VH01M/VH01M_with_cdm2.csv"
df_query_VH01 = spark.read.csv(res_VH01, header=True, sep=";")
df_query_VH01 = df_query_VH01.drop("ACCRESPID").drop("ACCRESPIDF").drop("CLEARING_ID").drop("IDBATCHAUTO").drop("RATA_TAX_RATIO")
# df_query_SKB = df_query_SKB.withColumn("LADM_TWBTRG", df_query_SKB["LADM_TWBTRG"].cast(DecimalType(38, 5))) \
#                         .withColumn("LADM_DMBTR", df_query_SKB["LADM_DMBTR"].cast(DecimalType(38, 5)))
eds_VH01_columns = df_query_VH01.columns
df_query_VH01 = df_query_VH01.fillna("NULL_VALUE", subset=eds_VH01_columns)
df_query_VH01.createOrReplaceTempView("table_query_SKB")

In [None]:
res_VH01 = "/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200804_132001.CSV"
df_VH01M = spark.read.csv(res_VH01, header=True, sep=";")
df_VH01M = df_VH01M.drop("ACCRESPID").drop("ACCRESPIDF").drop("CLEARING_ID").drop("IDBATCHAUTO").drop("RATA_TAX_RATIO").drop("CLEARING_DATE")
# df_query_SKB = df_query_SKB.withColumn("LADM_TWBTRG", df_query_SKB["LADM_TWBTRG"].cast(DecimalType(38, 5))) \
#                         .withColumn("LADM_DMBTR", df_query_SKB["LADM_DMBTR"].cast(DecimalType(38, 5)))
eds_VH01_columns = df_VH01M.columns
df_VH01M = df_VH01M.fillna("NULL_VALUE", subset=eds_VH01_columns)
df_VH01M.createOrReplaceTempView("table_query_SKB")

In [None]:
csv_test = "/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/analyse_mismatch"

df_query_VH01 = df_query_VH01.subtract(df_VH01M)


eds_VH01_columns = df_query_VH01.columns

df_query_VH01_columns_renamed = df_query_VH01


for eds_VH01_column in eds_VH01_columns:
    df_query_VH01_columns_renamed = df_query_VH01_columns_renamed.withColumnRenamed(eds_VH01_column, "EDS_" + eds_VH01_column)

df_VH01_join_eds_src = df_query_VH01_columns_renamed.join(df_VH01M, df_query_VH01_columns_renamed["EDS_BASL_OUT_ID"] == df_VH01M["BASL_OUT_ID"], how='left outer')

# for eds_VH01_column in eds_VH01_columns:
#     df_VH01_join_eds_src = df_VH01_join_eds_src.withColumn("FLAG_" + eds_VH01_column, when(df_VH01_join_eds_src["EDS_" + eds_VH01_column]==df_VH01_join_eds_src[eds_VH01_column], "True").otherwise("False"))

#df_VH01_join_eds_src.repartition(1).write.csv(csv_test, sep=";", header=True)


In [None]:
src_FR_WGL_K23 = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200804_132001.CSV",header=True,sep=';')

In [None]:
c = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm2.csv",header=True,sep=';')

In [None]:
src_FR_WGL_K23 = "/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200902_135314.CSV"
res_FR_WGL_K23 = "/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm_septembre_budat_edited/VH03M_with_cdm_septembre_budat_edited.csv"

In [None]:
df_VH01M_src = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/LDM_PRD_FRA_VH03M_200902_135314.CSV",header=True,sep=';')
df_VH01M_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/VH03M/VH03M_with_cdm_septembre_budat_edited/VH03M_with_cdm_septembre_budat_edited.csv",header=True,sep=';')

In [None]:
df_VH01M_src.subtract(df_VH01M_res).count()

In [None]:
df_VH01M_res.subtract(df_VH01M_src).count()

In [None]:
df_src_FR_WGL_K23 = spark.read.csv(src_FR_WGL_K23, header=True, sep=";")

src_WGL_K23_columns = df_src_FR_WGL_K23.columns
df_src_FR_WGL_K23 = df_src_FR_WGL_K23.fillna("NULL_VALUE", subset=src_WGL_K23_columns)

#df_src_FR_WGL_K23.registerTempTable("table_src_FR_WGL_K23")

In [None]:
df_query_FR_WGL_K23 = spark.read.csv(res_FR_WGL_K23, header=True, sep=";")

eds_WGL_K23_columns = df_query_FR_WGL_K23.columns
df_query_FR_WGL_K23 = df_query_FR_WGL_K23.fillna("NULL_VALUE", subset=eds_WGL_K23_columns)

#df_query_FR_WGL_K23.createOrReplaceTempView("table_query_FR_WGL_K23")

In [None]:
csv_test = "/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/mismatch_septembre_edited_2.csv"

eds_WGL_K23_columns = df_query_FR_WGL_K23.columns

df_query_FR_WGL_K23_columns_renamed = df_query_FR_WGL_K23

for eds_WGL_K23_column in eds_WGL_K23_columns:
    df_query_FR_WGL_K23_columns_renamed = df_query_FR_WGL_K23_columns_renamed.withColumnRenamed(eds_WGL_K23_column, "EDS_" + eds_WGL_K23_column)

df_WGL_K23_join_eds_src = df_query_FR_WGL_K23_columns_renamed.join(df_src_FR_WGL_K23, df_query_FR_WGL_K23_columns_renamed["EDS_BASL_OUT_ID"] == df_src_FR_WGL_K23["BASL_OUT_ID"], how='left_outer')

#df_WGL_K23_join_eds_src.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/champs_eds_resultat.csv", sep=";", header=True)

for eds_WGL_K23_column in eds_WGL_K23_columns:
    df_WGL_K23_join_eds_src = df_WGL_K23_join_eds_src.withColumn("FLAG_" + eds_WGL_K23_column, when(df_WGL_K23_join_eds_src["EDS_" + eds_WGL_K23_column]==df_WGL_K23_join_eds_src[eds_WGL_K23_column], "True").otherwise("False"))

df_WGL_K23_join_eds_src.repartition(1).write.csv(csv_test, sep=";", header=True)


In [None]:
query1 = """SELECT DISTINCT BUDAT
            FROM VH01M_src
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(50,False)

In [None]:
df_query_VH01.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/analyse_df_query_VH01", sep=";", header=True)
df_query_VH01_columns_renamed.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/df_query_VH01_columns_renamed", sep=";", header=True)

In [None]:
df_VH01_join_eds_src.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/VH03M/ANALYSES/df_VH01_join_eds_srctest3", sep=";", header=True)

In [None]:
eds_VH01_columns = df_query_VH01.columns

## CL05M

In [None]:
res_CL05M = "/data/dropbox/larcher/INFOMAPS/CL05M/CL05M_2.csv"

queryCL05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,AJ,AZJ,BASL_OUT_ID,BEC_ERKENNUNG3,BELDAT,BELNR,BKTXT,BLART,BUBSZ,BUDAT,BUPER, BUPRT,DATE1,
DATE2, EDS_DMBTR_INS AS LADM_DMBTR, FMK1,FS_BLNR1,FS_DCH,FS_HWAERS,FS_VBUND1,FS_WAERS,GSP,KHD,KUNDENNR,MAY,PRODSCHL,RISK,
RISKLAND,SAP_WAERS,SGTXT,SNR,SUBRISK,TICKID, TWBTRG, EDS_TWBTRG AS LADM_TWBTRG, VSNR,WERBERNR,WITHT,YYMAY,ZFBDT_AUDIT as ZFBDT,
ZZBSZ,ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL

WHERE GJAHR = 2020
AND MONAT = 8
AND BUKRS = '9PFR'
AND RACCT IN ('6111101048','6111201049','6111108008','6111208009','6111104008','6111204009','5111001008','5111001009','5111001018','5111001019','5111001028','5111001029','5111001038','5111001039','7870007019','7882130000','2412001100') 
AND ZZONR LIKE 'CLE%'
AND LDGRP IN (' ', 'L1')
AND (CPUDT < '2020-09-07' OR (CPUDT = '2020-09-07' AND CPUTM < '090004'))
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    }
}

dict_timestampFormat = {

}

# dfCL05M= sqlContext.sql(queryCL05M)
# dfCL05M.repartition(1).write.parquet(res_CL05M)

queryCL05M = runInfoMap2(spark, queryCL05M, dict_dateFormat, dict_timestampFormat, '.', res_CL05M, "csv", 1, True, ";")


In [None]:
df_CL05M = spark.read.csv("/data/dropbox/larcher/INFOMAPS/CL05M/CL05M_with_cpudt.csv",header=True,sep=';')

In [None]:
df_CL05M.count()

In [None]:
## GA5 

In [None]:
res_GA5 = "/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/GA5_2.csv"

queryGA5 = """
SELECT
    FS_BUKRS,
    BUDAT,FS_VBUND1,FMK1,FMK2,tb.PRODSCHL,FS_DCH,
    SUM(TWBTRG) AS SUM_TWBTRG,
    FS_WAERS,GSA,tb.GSAKAT,VT_AZP_ORIGINAL,RVA,RIP,DATE4,MDCF,ABZ,BUPRT,RISK,tb.SUBRISK,
    QTE_UC,ZZONR,ACCRESPID,BSCHL,FS_LDGRP1,BLART,tb.RACCT,A_ALTACCOUNT,GSBER,YYLOB,
    YYCSG,VBUND,ZZVMK,YYPRG,YYDCH,KOSTL,
    '' AS KAPLA,ENTRC,GSP, MAX(BASL_OUT_ID), MIN(BASL_OUT_ID)
    
FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/` AS tb
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-09-11` AS tbu ON tbu.BUKRS = tb.BUKRS
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-09-11` AS tr ON tr.RACCT = tb.RACCT
LEFT JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-09-11` AS ta ON (tbu.ID_BUKRS = ta.ID_BUKRS AND tr.ID_RACCT = ta.ID_RACCT)

WHERE DATA_DATE_PARTITION IN ('2020-09-13', '2020-09-12','2020-09-11')
    AND FS_BUKRS RLIKE '(^601|^621|^622|^9L|^9P|^9R)'
    AND (FS_LDGRP1 IS NULL OR FS_LDGRP1 IN ('L1', 'I1'))
    AND A_GRPCPTE_RACCT = 'ZRES'
    AND (ABZ IS NOT NULL OR ABZ != ' ')
    AND (tb.GSAKAT IS NOT NULL OR tb.GSAKAT != ' ')
    AND tb.RACCT RLIKE '(^5|^6)'
    AND BASL_OUT_ID > '22418811345'
    AND (CPUDT < '2020-09-13' OR (CPUDT = '2020-09-13' AND CPUTM < '073200'))
GROUP BY
    FS_BUKRS,
    BUDAT,FS_VBUND1,FMK1,FMK2,tb.PRODSCHL,FS_DCH,
    FS_WAERS,GSA,tb.GSAKAT,VT_AZP_ORIGINAL,RVA,RIP,DATE4,MDCF,ABZ,BUPRT,RISK,tb.SUBRISK,
    QTE_UC,ZZONR,ACCRESPID,BSCHL,FS_LDGRP1,BLART,tb.RACCT,A_ALTACCOUNT,GSBER,YYLOB,
    YYCSG,VBUND,ZZVMK,YYPRG,YYDCH,KOSTL,
    ENTRC,GSP

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    }
}

dict_timestampFormat = {

}

# dfCL05M= sqlContext.sql(queryCL05M)
# dfCL05M.repartition(1).write.parquet(res_CL05M)

queryGA5 = runInfoMap2(spark, queryGA5, dict_dateFormat, dict_timestampFormat, '.', res_GA5, "csv", 1, True, ";")


In [None]:
## analyse source 12/09 2h et 7h 

In [None]:
df_GA5_2h = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/LDM_PRD_FRA_GA5DA_200912_020514.CSV",header=True,sep=';')

In [None]:
df_GA5_2h.count()

In [None]:
df_GA5_7h = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/LDM_PRD_FRA_GA5DA_200912_070534.CSV",header=True,sep=';')

In [None]:
df_GA5_7h.count()

In [None]:
df_GA5_2h_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200912_20200912020110_GA5DA_0004.CSV",header=True,sep=';')
df_GA52_7h_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200912_20200912073155_GA5DA_0004.CSV",header=True,sep=';')

In [None]:
print(df_GA5_2h_res.count())
print(df_GA52_7h_res.count())

In [None]:
df_GA5_2h.drop("KAPLA").subtract(df_GA5_2h_res.drop("KAPLA")).count()

In [None]:
df_GA5_7h.drop("KAPLA").subtract(df_GA52_7h_res.drop("KAPLA")).count()

In [None]:
df_GA5_7h_notebook.count()

In [None]:
## analyse source 13/09 2h et 7h

In [None]:
df_GA5_1309_2h = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/LDM_PRD_FRA_GA5DA_200913_020554.CSV",header=True,sep=';')

In [None]:
df_GA5_1309_7h = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/LDM_PRD_FRA_GA5DA_200913_070615.CSV",header=True,sep=';')

In [None]:
print(df_GA5_1309_2h.count())
print(df_GA5_1309_7h.count())

In [None]:
df_GA5_1309_2h_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200913_20200913020135_GA5DA_0004.CSV",header=True,sep=';')
df_GA5_1309_7h_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200913_20200913073211_GA5DA_0004.CSV",header=True,sep=';')

In [None]:
print(df_GA5_1309_2h_res.count())
print(df_GA5_1309_7h_res.count())

In [None]:
df_GA5_1309_2h.drop("KAPLA").subtract(df_GA5_1309_2h_res.drop("KAPLA")).count()

In [None]:
## analyse test notebook 13/09 à 7h

In [None]:
df_GA5_7h_notebook = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/GA5_2.csv",header=True,sep=';')

In [None]:
df_GA5_7h_notebook.count()

In [None]:
df_GA5_7h_notebook.select("MAX(BASL_OUT_ID)","MIN(BASL_OUT_ID)").show(10)

In [None]:
basl_max = df_GA5_7h_notebook.agg({"MAX(BASL_OUT_ID)": "max"}).collect()[0][0]
basl_min = df_GA5_7h_notebook.agg({"MIN(BASL_OUT_ID)": "min"}).collect()[0][0]
print(basl_max)
print(basl_min)

In [None]:
## analyse source 14/09 2h et 7h

In [None]:
df_GA5_1409_2h = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/GA5/TEST/LDM_PRD_FRA_GA5DA_200914_020635.CSV",header=True,sep=';')

In [None]:
df_GA5_1409_2h.count()

In [None]:
df_GA5_1309_7h_res.drop("KAPLA").subtract(df_GA5_1409_2h.drop("KAPLA")).count()

In [None]:
## analyse source 15/09 2h et 7h

In [None]:
df_GA5_1509_2h = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200915_20200915020301_GA5DA_0004.CSV",header=True,sep=';')

In [None]:
df_GA5_1509_2h.count()

In [None]:
df_GA5_1509_7h = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200915_20200915073203_GA5DA_0004.CSV",header=True,sep=';')

In [None]:
df_GA5_1509_7h.count()

In [None]:
from datetime import datetime, timedelta

In [None]:
today = datetime.now()
myQuery = "test"
delta = timedelta(days=1)

lst_dates = [(today - delta).strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')]
str_dates = "', '".join(lst_dates)
myQuery = myQuery.replace("t",str_dates)

In [None]:
(today - delta).strftime('%Y-%m-%d')

In [None]:
myQuery

In [None]:
import time
from datetime import date
today = datetime.now()
delta = timedelta(days=1)

In [None]:
print(today)
print(delta)

In [None]:
(today - delta).strftime('%Y-%m-%d')

In [None]:
myQuery = "'%list_dates' -- %data_date_delta_mode" 
if "specific_dates" in dict_config_query["imfrName"]["query_values"]:

    for elt in dict_config_query["imfrName"]["query_values"]["specific_dates"]:
        #### today
        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "today":
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], today.strftime('%Y-%m-%d'))

        #### yesterday_and_today
        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "yesterday_and_today":
            today = datetime.now()
            lst_dates = [(today - delta).strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')]
            str_dates = "', '".join(lst_dates)
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], str_dates)

        #### from_one_to_ten_working_days_included_saturday
        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "from_one_to_ten_working_days_included_saturday":
            lst_dates = []
            while len(lst_dates) < 10:
                if cal.is_working_day(start_date) or (start_date.strftime("%A") == "Saturday" and start_date not in holidays):
                    lst_dates.append(start_date.strftime('%Y-%m-%d'))
                start_date += delta
            str_dates = "', '".join(lst_dates)
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], str_dates)

        #### third_and_fourth_sunday_of_current_month
        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "third_and_fourth_sunday_of_current_month":
            lst_dates = [Calendar.get_nth_weekday_in_month(thisYear, thisMonth, SUN, 3).strftime('%Y-%m-%d'), Calendar.get_nth_weekday_in_month(thisYear, thisMonth, SUN, 4).strftime('%Y-%m-%d')]
            str_dates = "', '".join(lst_dates)
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], str_dates)

        ##################### Filters for delta_mode
        #### daily_except_sunday
        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "daily_except_sunday":
            lst_dates = [(today - timedelta(days=2)).strftime('%Y-%m-%d'), (today - delta).strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')]
            str_dates = "', '".join(lst_dates)
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], str_dates)


        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] == "CPUDT_J+4_M-1":
            today = date.today()
            day_cpudt = (today - relativedelta(months=1))
            start_date = date(day_cpudt.year, day_cpudt.month, 1)
            date_cpudt = cal.add_working_days(start_date, 4)
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"],date_cpudt.strftime('%Y-%m-%d'))



        if dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["value"] ==  "three_last_days":
            data_date = []
            today = time.strftime('%Y-%m-%d')
            yesterday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
            the_day_before_yesterday = (date.today() - timedelta(days=2)).strftime('%Y-%m-%d')
            data_date.append(today)
            data_date.append(yesterday)
            data_date.append(the_day_before_yesterday)
            data_date = str(data_date).replace("[", "").replace("]", "")
            myQuery = myQuery.replace(dict_config_query["imfrName"]["query_values"]["specific_dates"][elt]["regex"], data_date)


In [None]:
print(myQuery)

In [None]:
dict_config_query = {
    "imfrName": {
    "inputFile": "config/queries/query_DWADA.txt",
    "year": "",
    "month": "",
    "paths_list": "parsed_data/GY/VC/GY_BASL_720.parquet/|parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/|parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/|parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/",
    "years_relative_list": "",
    "months_relative_list": "",
    "datePart_list": "current2|current3|current4",
    "budates_list": "",
    "varmap": "P1|P2|D1|P3|D2|P4|D3",
    "outputLocation": "out/",
    "archiveLocation": "out/archive/GY_imfrs/",
    "outputFormat": "CSV",
    "codes_sa": {
      "GY06": "BI"
    },
    "archiveNamingConvention": "<codePipeline>_<codeSASrc>_<envName>_<codeSADest>_<envName>_<date>_<datetime>_<imfrName>_0004.ZIP",
    "fileNamingConvention": "<codeSASrc>_<envName>_<codeSADest>_<envName>_<date>_<datetime>_<imfrName>_0004.<outputFormat>",
    "flagNamingConvention": "<codePipeline>_<codeSASrc>_<envName>_<codeSADest>_<envName>_<date>_<datetime>_flag.TXT",
    "header": "True",
    "sep": ";",
    "dateFormat": {
      "*": {
        "format": "yyyyMMdd",
        "upper": False
      }
    },
    "timestampFormat": {
      "*": {
        "format": "yyyyMMdd:HH:mm:ss",
        "upper": True
      }
    },
    "decimalSeparator": ".",
    "query_values": {
      "specific_dates": {
        "0": {
          "value": "three_last_days",
          "regex": "%data_date_delta_mode"
        },
          "1": {
          "value": "yesterday_and_today",
          "regex": "%list_dates"
        }
          
      },
      "delta_mode": {
        "aggregation": False,
        "regex_select": ", %deltamode",
        "regex_where": "%basl"
      }
    },
    "decimalFormat": {},
    "sftp_transfer": False
  }
}

In [None]:
dict_config_query["imfrName"]["inputFile"]

In [None]:
## tests 

In [None]:
import numpy as np
import time
from datetime import datetime

In [None]:
2020-09-18 10:35:07.828849	SKB1Q
2020-09-18 10:35:07.969765	SKB1Q

In [None]:
a = ['2020-09-18 10:35:07.828849','2020-09-18 10:35:07.969765']

In [None]:
TS_EVENT_POSTGRE_STATUT_EN_COURS = np.array(a)

In [None]:
TS_EVENT_POSTGRE_STATUT_EN_COURS.min()

In [None]:
now = datetime(2020,9,18,10,35,7,828849)
print(now)

In [None]:
if len(TS_EVENT_POSTGRE_STATUT_EN_COURS) == 1 or ((len(TS_EVENT_POSTGRE_STATUT_EN_COURS) != 1) and (TS_EVENT_POSTGRE_STATUT_EN_COURS.min() == now)):
    print("ok")

In [None]:
res_MEP = "/data/dropbox/larcher/FLUX_RETOURS/MEP/MEP.csv"

queryCL05M = """
SELECT BUKRS,GJAHR,MONAT,RACCT,AJ,AZJ,BASL_OUT_ID,BEC_ERKENNUNG3,BELDAT,BELNR,BKTXT,BLART,BUBSZ,BUDAT,BUPER, BUPRT,DATE1,
DATE2, EDS_DMBTR_INS AS LADM_DMBTR, FMK1,FS_BLNR1,FS_DCH,FS_HWAERS,FS_VBUND1,FS_WAERS,GSP,KHD,KUNDENNR,MAY,PRODSCHL,RISK,
RISKLAND,SAP_WAERS,SGTXT,SNR,SUBRISK,TICKID, TWBTRG, EDS_TWBTRG AS LADM_TWBTRG, VSNR,WERBERNR,WITHT,YYMAY,ZFBDT_AUDIT as ZFBDT,
ZZBSZ,ZZONR 

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL

WHERE GJAHR = 2020
AND MONAT = 8
AND BUKRS = '9PFR'
AND RACCT IN ('6111101048','6111201049','6111108008','6111208009','6111104008','6111204009','5111001008','5111001009','5111001018','5111001019','5111001028','5111001029','5111001038','5111001039','7870007019','7882130000','2412001100') 
AND ZZONR LIKE 'CLE%'
AND LDGRP IN (' ', 'L1')
AND (CPUDT < '2020-09-07' OR (CPUDT = '2020-09-07' AND CPUTM < '090004'))
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    }
}

dict_timestampFormat = {

}

# dfCL05M= sqlContext.sql(queryCL05M)
# dfCL05M.repartition(1).write.parquet(res_CL05M)

queryMEP = runInfoMap2(spark, queryMEP, dict_dateFormat, dict_timestampFormat, '.', res_CL05M, "csv", 1, True, ";")


In [None]:
res_SKB = "/data/dropbox/larcher/FLUX_RETOURS/SKB/SKB_5.csv"

querySKB = """
SELECT
    ABZ,ACCRESPID,AJ,ASSURE,AWKEY,AZJ,AZP,
    BASISBETRAG,BEC,BEC_ERKENNUNG1,
    BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG7,BELDAT,BELNR,BETRAGSART,
    BEWAR,BGZN,BKTXT,BLART,BLDAT,BNRB,BSCHL,BUBSZ,BUDAT,tb.BUKRS,BUPER,BUPRT,BUZEI,
    CISIN,CPUDT,CSG,
    DATE1,DATE2,DATE3,DATE4,DMBTR,
    ENTRC,ESTIMATE,
    FMK1,FMK2,
    FS_BLNR1,FS_BLNR2,FS_BSCHL,FS_BUKRS,FS_DCH,FS_HWAERS,FS_KOSTL,FS_LDGRP1,
    EDS_MWSKZ AS LADM_MWSKZ,
    FS_VBUND1,FS_VBUND2,FS_WAERS,
    GJAGJE,GJAHR,GJVJ,GPOS,GSA,GSAKAT,GSBER,GSP,
    HBSNR,HGPOS,HWAER,
    ID_IRCA,INKASSOART,
    KHD,KOSTL,KUNDENNR,
    LANDL,LDGRP,LIFNR,
    MAY,MDCF,MONAT,MWSKZ,MWST_SATZ,
    PAYID,PERNR,PFS,PRG,PRODSCHL,PRODSCHL_FS,PROJK,
    QTE_UC,
    tb.RACCT,REFPGA,RIP,RISK,RISKLAND,RVA,RVJAHR,RVVERTRAG,
    SAP_WAERS,SCHADENOM,SDARTGRP,SEGMENT,SGTXT,SHKZG,SNR,SUB_ACCT,SUBRISK,
    TARIF,TAX_OPT,TAX_RATE,TCMU,TCODE,TICKID,TWBTRG,
    USNAM,
    VBUND,VMK,VORVNR,VSNR,VT_AZP_ORIGINAL,VTNRAB,VTNRRE,VUNR,
    WAERS,WERBERNR,WITHT,WRBTR,WT_QBUIHH,WT_QSSHH,WT_WITHCD,WWERT,
    XREF3,
    YYCSG,YYDCH,YYLOB,YYMAY,YYPRG,
    ZFBDT_AUDIT AS ZFBDT,ZJAHR,ZUBRINGER,ZUGJAHR,ZUGMONAT,ZZBSZ,ZZFMK1,ZZFMK2,ZZONR,ZZVERD,ZZVMK,
    A_ALTACCOUNT,
    BASL_OUT_ID,
    'Z00Z' AS ACCRESPIDF,
    '' AS CLEARING_ID,
    '' AS IDBATCHAUTO,
    EDS_TWBTRG AS LADM_TWBTRG,
    TAX_RATE AS LADM_TAX_RATE,
    '' AS RATA_TAX_RATIO,
    EDS_BASISBETRAG AS LADM_BASISBETRAG,
    EDS_WT_QSSHH AS LADM_WT_QSSHH,
    EDS_DMBTR_INS AS LADM_DMBTR
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS tb
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-06-25` tr ON (tb.RACCT = tr.RACCT)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-06-25` tbu ON (tb.BUKRS = tbu.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-06-25` ta ON ((tr.ID_RACCT = ta.ID_RACCT) AND (tbu.ID_BUKRS = ta.ID_BUKRS))

WHERE GJAHR = 2020
    AND MONAT = 9
    AND BUDAT IN ('2020-09-20','2020-09-27')
    AND ZZONR != 'FINSYS'
    
    AND (CPUDT < '2020-09-21' OR (CPUDT = '2020-09-21' AND CPUTM < '080000'))
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
    
     "WWERT" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : True   
     }
      
}

dict_timestampFormat = {
  

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

querySKB= runInfoMap2(spark, querySKB, dict_dateFormat, dict_timestampFormat, '.', res_SKB, "csv", 1, True, ";")

In [None]:
##REA

In [None]:
df_REA = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200921_20200921203154_READA_0004.CSV", header=True, sep=";")

In [None]:
df_REA = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200921_20200921203154_READA_0004.CSV", header=True, sep=";")
print("nombre lignes source : " + str(df_REA.count()))
basl_max_source = df_REA.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = df_REA.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))

In [None]:
res_REA = "/data/dropbox/larcher/FLUX_RETOURS/REA/REA_test.csv"

queryREA = """
SELECT BASL_OUT_ID

FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/`
WHERE DATA_DATE_PARTITION IN ('2020-09-21','2020-09-20','2020-09-19')
AND FS_BUKRS LIKE '601%'
AND PFS LIKE 'RA%'
AND BEC_ERKENNUNG1 IN ('OACACQREP','OACACQRET','OACCHAGIE','OACCOMACC','OACCOSPAA','OACCOSPAN','OACCRECON','OACCRELIB',
'OACENTPRI','OACENTSIN','OACESTCOA','OACESTENP','OACESTPRA','OACESTRCA','OACESTSIA','OACESTSIE','OACESTSIR','OACESTSPR',
'OACESTSTA','OACGARCON','OACGARLIB','OACINTESP','OACPARRES','OACPRACEM','OACPREVRE','OACPRIACC','OACPRIANU','OACPRINAC',
'OACPRINOA','OACPROGIE','OACPRSIAP','OACRACAFF','OACRECACC','OACRECCLO','OACRETSIN','OACSINACC','OACSINAPA','OACSINTAR',
'OACSORPRI','OACTECFIN','ORCACCAFF','ORCACCGRP','ORCACCRES','ORCACCVIR','ORCCESAFF','ORCCESGRP','ORCCESRES','ORCCESVIR',
'OREACQCLO','OREACQREP','ORECOMACQ','ORECOMADM','ORECOMREC','OREDETCON','OREDETLIB','OREENTPRI','OREENTSIN','OREESTACQ',
'OREESTADM','OREESTCOR','OREESTENP','OREESTENS''OREESTPAP','OREESTPAR','OREESTPAS','OREESTPRE','OREESTREC','OREESTSTE',
'OREESTTAR','OREINTESP','OREPARPRE','OREPARPRI','OREPARREC','OREPARRES','OREPARSIN','OREPERDEP','OREPRICLO','OREPRODEP',
'OREPROPRI','OREPROSIN','ORESINAAC','ORESINCLO','ORESINNMA','ORESINOMA','ORESINTAR','ORESORPRI','ORESORSIN','ORETAXDET',
'ORETECFIN','ORERENCLO','OREFRGCLO','OREPARFRG','OREPARREN','ORERECCLO','ORENNMCLO','OREPAECLO','OREPAACLO')
AND BASL_OUT_ID > '22516946697'
AND (CPUDT < '2020-09-21' OR (CPUDT = '2020-09-21' AND CPUTM < '203500'))
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "ZFBDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    
    },
    
     "WWERT" : {
     "format" : "ddMMMyyyy:HH:mm:ss",
     "upper" : True   
     }
      
}

dict_timestampFormat = {
  

}
dfREA= sqlContext.sql(queryREA)
dfREA.repartition(10).write.parquet(res_REA)

#querySKB= runInfoMap2(spark, querySKB, dict_dateFormat, dict_timestampFormat, '.', res_SKB, "csv", 1, True, ";")

In [None]:
df_REA = spark.read.parquet("/data/dropbox/larcher/FLUX_RETOURS/REA/REA_test.csv")
print("nombre lignes source : " + str(df_REA.count()))
basl_max_source = df_REA.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = df_REA.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))

In [None]:
res_blackline = "/data/dropbox/larcher/FLUX_RETOURS/blackline/blackline2.csv"

queryblackline = """
SELECT BUKRS, RACCT, RVA, ZZONR, LDGRP, SAP_WAERS, SUM(EDS_TWBTRG) AS LADM_TWBTRG

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL

WHERE BUKRS IN ('1238','1582','9004','9012','9023','9053','9109','9122','9136','9163','9238','9LDJ','9LDV','9LFR','9LGF','9LGP','9LIL','9LKM','9LMC','9LMF','9LMQ',
'9LNC','9LPF','9LPM','9LRE','9LYT','9PBE','9PBL','9PDV','9PFR','9PGB','9PGF','9PGP','9PIL','9PMF','9PMG','9PML','9PMQ','9PNC','9PNL','9PPF','9PPM','9PRE','9PTG',
'9PTN','9PUS','9PYT','NGAA','9LBL','9LDZ','9LHT','9LMA')

AND BASL.LDGRP IN (' ', 'L1')
AND ZZONR <> 'REGRP1' OR ZZONR IS NULL
AND BUDAT <= '2020-09-30' 
AND RACCT BETWEEN '1000000000' AND '4999999999'

AND (CPUDT < '2020-09-11' OR (CPUDT = '2020-09-11' AND CPUTM < '000000'))

GROUP BY BUKRS, RACCT, RVA, ZZONR, LDGRP, SAP_WAERS
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryblackline= runInfoMap2(spark, queryblackline, dict_dateFormat, dict_timestampFormat, '.', res_blackline, "csv", 1, True, ";")

In [None]:
SELECT /*  BROADCAST(b), BROADCAST(r), BROADCAST(ahc) */  
FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG7,FS_BSCHL,FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY, CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT, 
FS_BLNR2,FS_HWAERS,VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT, WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,basl.BUKRS,BLDAT,BSCHL,BEWAR,LDGRP,VBUND, 
YYLOB,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,basl.RACCT,ahc.A_ALTACCOUNT,SHKZG,BLART,LIFNR,ZZBSZ,BASL_OUT_ID,basl.CHANGE_DATE,HBSNR,VTNRAB, VTNRRE,BEC_ERKENNUNG3    

FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/` AS basl LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-09-22` r ON (basl.RACCT = r.RACCT) 
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-B
OIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-09-22` ahc ON ((r.ID_RACCT = ahc.ID_RACCT) AND (b.ID_BUKRS = ahc.ID_BUKRS))  

 WHERE DATA_DATE_PARTITION IN ('2020-09-23', '2020-09-22', '2020-09-21') 
 AND (SUBSTRING (FS_BUKRS,1,3) IN ('601','621','622') 
 OR FS_BUKRS LIKE '9L%' OR FS_BUKRS LIKE '9P%' OR FS_BUKRS LIKE '9R%') 
 AND (FS_LDGRP1 = 'L1' OR FS_LDGRP1 IS NULL) AND A_GRPCPTE_RACCT = 'ZRES' 
 AND SUBSTRING(basl.RACCT,1,3) NOT IN ('TAX','DAS','CMU') AND MDCF IN ('PRV','EMP','RET') 
 AND GSP NOT IN ('870') AND GSA IN ('1','2','6') 

 AND RISK NOT LIKE '8%'  AND ABZ NOT IN ('V4A5','V40Z','V44Z','V486','I450','I650','I840','I031','V013','I050','I085','I400','I420', 'I620','I422','I622',
 'V050','V085','V400','V420','V425','V231','V23D','I101','I191','I211','I429','I709','I791', 'I795','V113','V42D','V70D','I111','V08D','V19B','V29S','V291','V40G','V79D','V76A','V7I5','I043','I713','I676',
 'I655', 'I848','I842','I651','I675','I044','I714','V48B','I845','V48F','V48D','V009','V707','V48C','V48H','V48G','V027','V719','I040')  

AND SUBSTRING(ABZ,1,1) NOT IN ('M','F') AND SUBSTRING(ABZ,1,2) NOT IN ('VT','00') 

AND SUBSTRING(ABZ,1,3) NOT IN ('V74')  

AND RVA IN ('136','137','138','139','141','142','143','144','145','146','165','205','268','269','270','271','273','275','276', '277','278','281','283','320','384','430','501','503','504','582','646',
 '648','666','762','763','764','765','826','831','832', '869','894','895','896','897','951','952','954','962','964','965','966','967','969','981','982','983','984','988','989') 

AND (CPUDT < '2020-09-23' OR (CPUDT = '2020-09-23' AND CPUTM < '043000')) 
AND BASL_OUT_ID > '22532074268'  


In [None]:
res_ydr = "/data/dropbox/larcher/FLUX_RETOURS/YDR/YDR_test_MEP_2.csv"

queryydr = """
SELECT /*  BROADCAST(b), BROADCAST(r), BROADCAST(ahc) */  
FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG7,FS_BSCHL,FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY, CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT, 
FS_BLNR2,FS_HWAERS,VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT, WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,basl.BUKRS,BLDAT,BSCHL,BEWAR,LDGRP,VBUND, 
YYLOB,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,basl.RACCT,ahc.A_ALTACCOUNT,SHKZG,BLART,LIFNR,ZZBSZ,BASL_OUT_ID,basl.CHANGE_DATE,HBSNR,VTNRAB, VTNRRE,BEC_ERKENNUNG3    

FROM parquet.`/data/prod_env/data/parsed_data/GY/VC/GY_BASL_720.parquet/` AS basl 
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-09-22` r ON (basl.RACCT = r.RACCT) 
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-09-22` b ON (b.BUKRS = basl.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-09-22` ahc ON ((r.ID_RACCT = ahc.ID_RACCT) AND (b.ID_BUKRS = ahc.ID_BUKRS))  

 WHERE DATA_DATE_PARTITION IN ('2020-09-23', '2020-09-22', '2020-09-21') 
 AND (SUBSTRING (FS_BUKRS,1,3) IN ('601','621','622') 
 OR FS_BUKRS LIKE '9L%' OR FS_BUKRS LIKE '9P%' OR FS_BUKRS LIKE '9R%') 
 AND (FS_LDGRP1 = 'L1' OR FS_LDGRP1 IS NULL) AND A_GRPCPTE_RACCT = 'ZRES' 
 AND SUBSTRING(basl.RACCT,1,3) NOT IN ('TAX','DAS','CMU') AND MDCF IN ('PRV','EMP','RET') 
 AND GSP NOT IN ('870') AND GSA IN ('1','2','6') 

 AND RISK NOT LIKE '8%'  AND ABZ NOT IN ('V4A5','V40Z','V44Z','V486','I450','I650','I840','I031','V013','I050','I085','I400','I420', 'I620','I422','I622',
 'V050','V085','V400','V420','V425','V231','V23D','I101','I191','I211','I429','I709','I791', 'I795','V113','V42D','V70D','I111','V08D','V19B','V29S','V291','V40G','V79D','V76A','V7I5','I043','I713','I676',
 'I655', 'I848','I842','I651','I675','I044','I714','V48B','I845','V48F','V48D','V009','V707','V48C','V48H','V48G','V027','V719','I040')  

AND SUBSTRING(ABZ,1,1) NOT IN ('M','F') AND SUBSTRING(ABZ,1,2) NOT IN ('VT','00') 

AND SUBSTRING(ABZ,1,3) NOT IN ('V74')  

AND RVA IN ('136','137','138','139','141','142','143','144','145','146','165','205','268','269','270','271','273','275','276', '277','278','281','283','320','384','430','501','503','504','582','646',
 '648','666','762','763','764','765','826','831','832', '869','894','895','896','897','951','952','954','962','964','965','966','967','969','981','982','983','984','988','989') 

AND (CPUDT < '2020-09-23' OR (CPUDT = '2020-09-23' AND CPUTM < '043000')) 
AND BASL_OUT_ID > '22532074268'  

"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "yyyyMMdd",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {
    "CHANGE_DATE": {
        "format": "yyyyMMdd:HH:mm:ss",
        "upper": False
      }
}
#dfydr= sqlContext.sql(queryydr)
#dfydr.repartition(10).write.parquet(res_ydr)

queryydr= runInfoMap2(spark, queryydr, dict_dateFormat, dict_timestampFormat, '.', res_ydr, "csv", 1, True, ";")

In [None]:
df_ydr_src.printSchema()

In [None]:
df_ydr_src = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/LOT13/LDM_PRD_FRA_YDRDA_200923_041708.CSV", header=True, sep=";")
print("nombre lignes source : " + str(df_ydr_src.count()))
basl_max_source = df_ydr_src.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = df_ydr_src.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))

In [None]:
df_ydr_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200923_20200923043201_YDRDA_0004.CSV", header=True, sep=";")
print("nombre lignes source : " + str(df_ydr_res.count()))
basl_max_source = df_ydr_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = df_ydr_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
df_ydr_notbook = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/YDR/YDR_test_MEP_2.csv", header=True, sep=";")
print("nombre lignes extract : " + str(df_ydr_notbook.count()))
basl_max_extract = df_ydr_notbook.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = df_ydr_notbook.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
df_ydr_src.drop("CHANGE_DATE").subtract(df_ydr_notbook.drop("CHANGE_DATE")).count()

In [None]:
df_ydr_src.select("FS_BUKRS").distinct().show(100)

In [None]:
df_ydr_notbook.select("FS_BUKRS").distinct().show(100)

In [None]:
.subtract(df_ydr_src.drop("CHANGE_DATE")).show(1)

In [None]:
df_ydr_notbook.filter("FS_BUKRS IN ('9RFR','622001')").count()

In [None]:
851585 - 32962

In [None]:
df_ydr_notbook.drop("CHANGE_DATE").subtract(df_ydr_notbook.drop("CHANGE_DATE")).count()

In [None]:
df_ydr_src.drop("CHANGE_DATE").filter("BASL_OUT_ID = '22533307895'").show(1)

In [None]:
res_blackline = "/data/dropbox/larcher/FLUX_RETOURS/blackline/blackline_cdm.csv"

queryblackline = """
SELECT BUKRS, RACCT, RVA, ZZONR, LDGRP, SAP_WAERS, SUM(EDS_TWBTRG) AS LADM_TWBTRG

FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/` AS BASL

WHERE BUKRS IN ('1238','1582','9004','9012','9023','9053','9109','9122','9136','9163','9238','9LDJ',
'9LDV','9LFR','9LGF','9LGP','9LIL','9LKM','9LMC','9LMF','9LMQ','9LNC','9LPF','9LPM','9LRE','9LYT',
'9PBE','9PBL','9PDV','9PFR','9PGB','9PGF','9PGP','9PIL','9PMF','9PMG','9PML','9PMQ','9PNC','9PNL',
'9PPF','9PPM','9PRE','9PTG','9PTN','9PUS','9PYT','NGAA','9LBL','9LDZ','9LHT','9LMA')

AND RACCT BETWEEN '1000000000' AND '4999999999'

AND BASL.LDGRP IN (' ', 'L1')
AND ZZONR <> 'REGRP1' OR ZZONR IS NULL
AND BUDAT <= '2020-09-30' 

AND (CPUDT < '2020-09-11' OR (CPUDT = '2020-09-11' AND CPUTM < '000000'))

GROUP BY BUKRS, RACCT, RVA, ZZONR, LDGRP, SAP_WAERS
"""

dict_dateFormat = {
    "BELDAT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    "BLDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : True
    },
    "BUDAT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "CPUDT" : {
        "format" : "ddMMMyyyy:HH:mm:ss",
        "upper" : False
    },
    "ZFBDT" : {
        "format" : "yyyyMMdd",
        "upper" : False
    },
    
    
}

dict_timestampFormat = {

}
#dfAD01M= sqlContext.sql(queryYDR)
#dfAD01M.repartition(10).write.parquet(dfYDR)

queryblackline= runInfoMap2(spark, queryblackline, dict_dateFormat, dict_timestampFormat, '.', res_blackline, "csv", 1, True, ";")

In [None]:
df_test_asac = spark.read.parquet("/data/prod_env/data/parsed_data/SA01/VC/SA01_SAVPERS.parquet")

In [None]:
df_test_asac.registerTempTable("SAVPERS")

In [None]:
query1 = """SELECT COUNT(*), DATA_DATE_PARTITION
            FROM SAVPERS
            GROUP BY DATA_DATE_PARTITION
            ORDER BY DATA_DATE_PARTITION
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(100,False)

In [None]:
## OX 

In [None]:
ox_21_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_OXCDA_200921_221537.CSV", header=True, sep=";")
print("nombre lignes source : " + str(ox_21_source.count()))
basl_max_source = ox_21_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = ox_21_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
ox_21_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_BI_PRD_20200921_20200921220419_OXCDA_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(ox_21_res.count()))
basl_max_extract = ox_21_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = ox_21_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
ox_23_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_OXCDA_200923_221800.CSV", header=True, sep=";")
print("nombre lignes source : " + str(ox_23_source.count()))
basl_max_source = ox_23_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = ox_23_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
ox_23_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_BI_PRD_20200923_20200923220659_OXCDA_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(ox_23_res.count()))
basl_max_extract = ox_23_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = ox_23_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
# ox_23_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_OXCDA_200923_221800.CSV", header=True, sep=";")
# print("nombre lignes source : " + str(ox_23_source.count()))
# basl_max_source = ox_23_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = ox_23_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
ox_22_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200922_20200922220505_OXCDA_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(ox_22_res.count()))
basl_max_extract = ox_22_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = ox_22_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
ox_24_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_OXCDA_200924_221935.CSV", header=True, sep=";")
print("nombre lignes source : " + str(ox_24_source.count()))
basl_max_source = ox_24_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = ox_24_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
ox_24_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_BI_PRD_20200924_20200924220443_OXCDA_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(ox_24_res.count()))
basl_max_extract = ox_24_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = ox_24_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
## SKB 

In [None]:
skb_22_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_IDBATCHAUTO_200922__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(skb_22_source.count()))
basl_max_source = skb_22_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = skb_22_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
skb_22_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_BI_PRD_20200922_20200922080942_SKB1Q_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(skb_22_res.count()))
basl_max_extract = skb_22_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = skb_22_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
skb_23_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_IDBATCHAUTO_200923__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(skb_23_source.count()))
basl_max_source = skb_23_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = skb_23_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
skb_23_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_GY0100_PRD_20200923_20200923081004_SKB1Q_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(skb_23_res.count()))
basl_max_extract = skb_23_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = skb_23_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
skb_24_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_IDBATCHAUTO_200924__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(skb_24_source.count()))
basl_max_source = skb_24_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = skb_24_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
skb_24_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_BI_PRD_20200924_20200924081011_SKB1Q_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(skb_24_res.count()))
basl_max_extract = skb_24_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = skb_24_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
skb_24_res.registerTempTable("skb_24_res")

In [None]:
skb_25_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/source/LDM_PRD_FRA_IDBATCHAUTO_200925__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(skb_25_source.count()))
basl_max_source = skb_25_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = skb_25_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
skb_25_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/MEP/new/OX/GY06_PRD_GY0100_PRD_20200925_20200925081020_SKB1Q_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(skb_25_res.count()))
basl_max_extract = skb_25_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = skb_25_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

In [None]:
skb_25_res.registerTempTable("skb_25_res")

In [None]:
## IE01D- 28
IE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_200928_042310.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE01D_source.count()))
basl_max_source = IE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE01D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0609_PRD_20200928_20200928033317_IE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE01D_res.count()))
basl_max_extract = IE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
print(IE01D_source.filter("LDGRP NOT IN('K4','M3')").count())
print(IE01D_res.filter("LDGRP NOT IN('K4','M3')").count())


In [None]:
IE01D_source.select("LDGRP").distinct().show(50)
IE01D_res.select("LDGRP").distinct().show(50)

In [None]:
IE01D_source.printSchema()

In [None]:
df_query_SKB = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0609_PRD_20200928_20200928033317_IE01D_0004.CSV", header=True, sep=";")
df_src_SKB = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_200928_042310.CSV", header=True, sep=";")
df_query_SKB = df_query_SKB.select("BASL_OUT_ID","ACCRESPID","LADM_DMBTR","LADM_TWBTRG")
df_src_SKB = df_src_SKB.select("BASL_OUT_ID","ACCRESPID","LADM_DMBTR","LADM_TWBTRG")

In [None]:
csv_test = "/data/dropbox/larcher/INFOMAPS/IE01D/ecarts_flag2.csv"

df_query_SKB = df_query_SKB.subtract(df_src_SKB)

eds_SKB_columns = df_query_SKB.columns

df_query_SKB_columns_renamed = df_query_SKB

for eds_SKB_column in eds_SKB_columns:
    df_query_SKB_columns_renamed = df_query_SKB_columns_renamed.withColumnRenamed(eds_SKB_column, "EDS_" + eds_SKB_column)

df_SKB_join_eds_src = df_query_SKB_columns_renamed.join(df_src_SKB, df_query_SKB_columns_renamed["EDS_BASL_OUT_ID"] == df_src_SKB["BASL_OUT_ID"], how='left_outer')

for eds_SKB_column in eds_SKB_columns:
    df_SKB_join_eds_src = df_SKB_join_eds_src.withColumn("FLAG_" + eds_SKB_column, when(df_SKB_join_eds_src["EDS_" + eds_SKB_column]==df_SKB_join_eds_src[eds_SKB_column], "True").otherwise("False"))

df_SKB_join_eds_src.repartition(1).write.csv(csv_test, sep=";", header=True)


In [None]:
## IE01D - 29
IE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_200929_042453.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE01D_source.count()))
basl_max_source = IE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20200929_20200929034503_IE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE01D_res.count()))
basl_max_extract = IE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
IE01D_source.subtract(IE01D_res).count()

In [None]:
print(IE01D_source.filter("LDGRP NOT IN('K4','M3')").count())
print(IE01D_res.filter("LDGRP NOT IN('K4','M3')").count())

In [None]:
# exclu accrespid et accrespidF, ldgrp = K4 et M3
IE01D_source.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF")).count()

In [None]:
# exclu: LADM_DMBTR et LADM_TWBTRG accrespid et accrespidF, ldgrp = K4 et M3
IE01D_source.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG")).count()

In [None]:
# exclu: LADM_DMBTR et LADM_TWBTRG accrespid et accrespidF, ldgrp = K4 et M3 et 
    IE01D_source.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG","MDCF").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG","MDCF")).count()

In [None]:
# exclu: LADM_DMBTR et LADM_TWBTRG accrespid et accrespidF, ldgrp = K4 et M3 et 
df_ecart = IE01D_source.filter("LDGRP NOT IN('K4','M3')").select("BASL_OUT_ID","ACCRESPID").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").select("BASL_OUT_ID","ACCRESPID"))

In [None]:
df_ecart.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IE01D/ecarts.csv")

In [None]:
IE01D_res.select("BASL_OUT_ID","ACCRESPID").filter("BASL_OUT_ID = 21012911793").show()

In [None]:
IE01D_source.filter("LDGRP NOT IN('K4','M3')").agg({"LADM_DMBTR": "sum"}).collect()[0][0]

In [None]:
IE01D_res.filter("LDGRP NOT IN('K4','M3')").agg({"LADM_DMBTR": "sum"}).collect()[0][0]

In [None]:
IE01D_source.filter("LDGRP NOT IN('K4','M3')").agg({"LADM_TWBTRG": "sum"}).collect()[0][0]

In [None]:
IE01D_res.filter("LDGRP NOT IN('K4','M3')").agg({"LADM_TWBTRG": "sum"}).collect()[0][0]

In [None]:
df_ecart = IE01D_source.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG","MDCF").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG","MDCF")).show(1)

In [None]:
IE01D_res.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG").filter("BASL_OUT_ID = '21098618408'").show()

In [None]:
IE01D_source.filter("LDGRP NOT IN('K4','M3')").select("BASL_OUT_ID").subtract(IE01D_res.filter("LDGRP NOT IN('K4','M3')").select("BASL_OUT_ID")).count()

In [55]:
## IE01D - 30
IE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_200930_042615.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE01D_source.count()))
basl_max_source = IE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20200930_20200930033850_IE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE01D_res.count()))
basl_max_extract = IE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 1473680
basl out id max source : 22559415105
basl out id min source : 20219541098
nombre lignes extract : 1471518
basl out id max extract : 22559415105
basl out id min extract : 20219541098


In [5]:
IE01D_source_corrige = IE01D_source.filter("LDGRP NOT IN('K4','M3')")
IE01D_res_corrige = IE01D_res.filter("LDGRP NOT IN('K4','M3')")

In [6]:
print(IE01D_source_corrige.count())
print(IE01D_res_corrige.count())

59489
59489


In [15]:
IE01D_source_corrige.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK").subtract(IE01D_res_corrige.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK")).count()

0

In [16]:
IE01D_res_corrige.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK").subtract(IE01D_source_corrige.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK")).count()

0

In [17]:
IE01D_source_corrige.registerTempTable("ie01_src")

In [18]:
IE01D_res_corrige.registerTempTable("ie01_res")

In [50]:
query = """
SELECT BUKRS, LDGRP,ZZONR, CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15)), CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
FROM ie01_res
GROUP BY BUKRS, LDGRP, ZZONR
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(5,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/IE01D/tcd_res.csv,",header=True,sep=';')

+-----+-----+------+--------------------------------------------------------+-------------------------------------------------------+
|BUKRS|LDGRP|ZZONR |CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,15))|
+-----+-----+------+--------------------------------------------------------+-------------------------------------------------------+
|9LFR |L1   |TPICOL|-8.0733457E-8                                           |-8.0733457E-8                                          |
|9LFR |L1   |TPRCOL|3.46972E-10                                             |3.46972E-10                                            |
|9PFR |L1   |INVMAN|0E-15                                                   |0E-15                                                  |
|9LFR |L1   |IMMSYS|6.3795596E-8                                            |6.3795596E-8                                           |
|9LFR |L1   |VINHB2|-1.829449E-9                              

In [None]:
query = """
SELECT BUKRS,RACCT, ZZONR,SUM(DM)
FROM cm01d_src
GROUP BY RACCT, ZZONR, MDCF
"""
df_tcd = sqlContext.sql(query)
df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

In [26]:
query = """
SELECT SUM(LADM_DMBTR)
FROM ie01_src
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+-------------------------------+
|sum(CAST(LADM_DMBTR AS DOUBLE))|
+-------------------------------+
|             0.5499965474009514|
+-------------------------------+



In [27]:
query = """
SELECT SUM(LADM_DMBTR)
FROM ie01_res
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+-------------------------------+
|sum(CAST(LADM_DMBTR AS DOUBLE))|
+-------------------------------+
|           -1.28376996144652...|
+-------------------------------+



In [30]:
query = """
SELECT SUM(LADM_TWBTRG)
FROM ie01_src
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+--------------------------------+
|sum(CAST(LADM_TWBTRG AS DOUBLE))|
+--------------------------------+
|0.5499965373892337              |
+--------------------------------+



In [29]:
query = """
SELECT SUM(LADM_TWBTRG)
FROM ie01_res
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+--------------------------------+
|sum(CAST(LADM_TWBTRG AS DOUBLE))|
+--------------------------------+
|            -1.27474777400493...|
+--------------------------------+



In [22]:
df_tcd.show(277)

+-----+----------+------+--------------------------------+-------------------------------+
|BUKRS|     RACCT| ZZONR|sum(CAST(LADM_TWBTRG AS DOUBLE))|sum(CAST(LADM_DMBTR AS DOUBLE))|
+-----+----------+------+--------------------------------+-------------------------------+
| 9LFR|5230001001|IMMSYS|              -7549717.540000001|             -7549717.540000001|
| 9LFR|7888888888|IMMSYS|                      9843131.74|                     9843131.74|
| 9PFR|4580007040|IMMSYS|              -965956.0800000001|             -965956.0800000001|
| 9LFR|1110011502|IMMSYS|              424.00999999999476|             424.00999999999476|
| 9LFR|3752040000|TPICOL|                         2873.99|                        2873.99|
| 9LFR|6211002530|TPICOL|                         6382.26|                        6382.26|
| 9PFR|6620001000|IMMSYS|                      4350814.49|                     4350814.49|
| 9LFR|6211001100|VINHB2|            1.7238075549999997E7|           1.7238075549999997E7|

+-------------------------------------------------------+
|CAST(sum(CAST(EDS_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|-145801548.739959240000000                             |
+-------------------------------------------------------+



In [14]:
IE01D_res_corrige.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK").filter("BASL_OUT_ID = 21098618423").show(1)

+-----+-----+-----+----------+-------+----+------+------------------+----+-----------+--------------------+--------------------+--------------+--------+----------+----------+--------+-----+------------------+----+-----+-----+--------+------+--------+--------+----+-----+-----+-----+-----+--------+----+--------------------+--------+--------+------+---------+---------+---------+------+---+-----+-----+-------+----------+---+-----+--------+-----+----+-----+---+-----+--------+------+----+--------+---+------+---------+---------+---------+-------+--------------------+----+--------+-------+------------+--------+------+----+------+----------+------+------+----+--------+-----+-----+-----+-----+-----+-----+-------+-----+------+------+
|BUKRS|GJAHR|MONAT|     RACCT|  ZUONR| ABZ|ASSURE|             AWKEY| AZJ|BASL_OUT_ID|                 BEC|      BEC_ERKENNUNG1|BEC_ERKENNUNG3|  BELDAT|     BELNR|BETRAGSART|   BKTXT|BLART|             BLDAT|BNRB|BSCHL|BUBSZ|   BUDAT| BUPER|   BUPRT|   CPUDT| CSG|DAT

In [None]:
21098618408|

+-------------------------------------------------------+
|CAST(sum(CAST(EDS_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|-145801548.739959240000000                             |
+-------------------------------------------------------+



In [None]:
print(IE01D_source.filter("LDGRP NOT IN('K4','M3')").count())
print(IE01D_res.filter("LDGRP NOT IN('K4','M3')").count())

In [None]:
## IE01D - 1
IE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_201001_042752.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE01D_source.count()))
basl_max_source = IE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20201001_20201001041038_IE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE01D_res.count()))
basl_max_extract = IE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
## IE01D - 1
IE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE01D/LDM_PRD_FRA_IE01D_201001_042752.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE01D_source.count()))
basl_max_source = IE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20201001_20201001041038_IE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE01D_res.count()))
basl_max_extract = IE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
## IE03D

In [None]:
## IE03D - 
IE03D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE03D/LDM_PRD_FRA_IE03D_200928_032303.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE03D_source.count()))
basl_max_source = IE03D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE03D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE03D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0609_PRD_20200928_20200928033228_IE03D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE03D_res.count()))
basl_max_extract = IE03D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE03D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
## IE03D - 
IE03D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE03D/LDM_PRD_FRA_IE03D_200929_032447.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE03D_source.count()))
basl_max_source = IE03D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE03D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE03D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20200929_20200929035937_IE03D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE03D_res.count()))
basl_max_extract = IE03D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE03D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
IE03D_source.select("BASL_OUT_ID").subtract(IE03D_res.select("BASL_OUT_ID")).count()

In [None]:
IE03D_source.subtract(IE03D_res).count()

In [None]:
# exclu accrespid et accrespidF, ldgrp = K4 et M3
IE03D_source.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG").subtract(IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG")).count()

In [None]:
IE03D_source.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG").subtract(IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG")).show(1)

In [None]:
IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_DMBTR","LADM_TWBTRG").filter('BASL_OUT_ID = "20952657933"').show()

In [73]:
## IE03D - 
IE03D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE03D/LDM_PRD_FRA_IE03D_200930_032608.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE03D_source.count()))
basl_max_source = IE03D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE03D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE03D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0609_PRD_20200930_20200930034800_IE03D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE03D_res.count()))
basl_max_extract = IE03D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE03D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 359564
basl out id max source : 22559392549
basl out id min source : 20219541100
nombre lignes extract : 359564
basl out id max extract : 22559392549
basl out id min extract : 20219541100


In [74]:
IE03D_source.registerTempTable("ie03_src")
IE03D_res.registerTempTable("ie03_res")

In [77]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM ie03_src
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-70097143.759938720000000                               |
+--------------------------------------------------------+



In [78]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM ie03_res
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-70097143.819614410000000                               |
+--------------------------------------------------------+



In [79]:
query = """
SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
FROM ie03_src
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|-581149.932716272500000                                |
+-------------------------------------------------------+



In [81]:
query = """
SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
FROM ie03_res
"""
df_tcd = sqlContext.sql(query)
df_tcd.show(1,False)
#df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|-581149.939995782400000                                |
+-------------------------------------------------------+



In [58]:
IE03D_source.subtract(IE03D_res).count()

213808

In [72]:
IE03D_source.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK","VTNRRE","SGTXT","VUNR").subtract(IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK","VTNRRE","SGTXT",'VUNR')).count()

0

In [68]:
df = IE03D_source.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK","VTNRRE","SGTXT").subtract(IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK","VTNRRE","SGTXT"))

In [69]:
df.show(1)

+-----+-----+-----+----------+--------+----+------+------------------+----+-----------+------+--------------+--------------+--------+----------+----------+--------+-----+--------+----+-----+-----+--------+------+-----+--------+----+-----+-----+-----+-----+--------+----+-----------+-----------+--------+------+---------+---------+---------+------+---+-----+-----+-------+----------+---+-----+---------+-----+----+-----+---+----+--------+------+----+--------+---+------+---------+---------+---------+-------+-----------+--------+----------------+--------+------+----+----------+--------------+------+----+--------+-----+-----+-----+-----+-----+-----+-------+-----+------+------+
|BUKRS|GJAHR|MONAT|     RACCT|   ZUONR| ABZ|ASSURE|             AWKEY| AZJ|BASL_OUT_ID|   BEC|BEC_ERKENNUNG1|BEC_ERKENNUNG3|  BELDAT|     BELNR|BETRAGSART|   BKTXT|BLART|   BLDAT|BNRB|BSCHL|BUBSZ|   BUDAT| BUPER|BUPRT|   CPUDT| CSG|DATE1|DATE2|DATE3|DATE4|ESTIMATE|FMK1|   FS_BLNR1|   FS_BLNR2|FS_BUKRS|FS_DCH|FS_LDGRP1|FS

In [71]:
IE03D_res.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG","LADM_DMBTR","MDCF","SUBRISK","VTNRRE","SGTXT").filter("BASL_OUT_ID = '21605341755'").show()

+-----+-----+-----+----------+--------+----+------+------------------+----+-----------+------+--------------+--------------+--------+----------+----------+--------+-----+--------+----+-----+-----+--------+------+-----+--------+----+-----+-----+-----+-----+--------+----+-----------+-----------+--------+------+---------+---------+---------+------+---+-----+-----+-------+----------+---+-----+---------+-----+----+-----+---+----+--------+------+----+--------+---+------+---------+---------+---------+-------+-----------+--------+----------------+--------+------+----+----------+--------------+------+----+--------+-----+-----+-----+-----+-----+-----+-------+-----+------+------+
|BUKRS|GJAHR|MONAT|     RACCT|   ZUONR| ABZ|ASSURE|             AWKEY| AZJ|BASL_OUT_ID|   BEC|BEC_ERKENNUNG1|BEC_ERKENNUNG3|  BELDAT|     BELNR|BETRAGSART|   BKTXT|BLART|   BLDAT|BNRB|BSCHL|BUBSZ|   BUDAT| BUPER|BUPRT|   CPUDT| CSG|DATE1|DATE2|DATE3|DATE4|ESTIMATE|FMK1|   FS_BLNR1|   FS_BLNR2|FS_BUKRS|FS_DCH|FS_LDGRP1|FS

In [56]:
## IE03D - 
IE03D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IE03D/LDM_PRD_FRA_IE03D_201001_032746.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IE03D_source.count()))
basl_max_source = IE03D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IE03D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IE03D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0609_PRD_20200928_20200928033228_IE03D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE03D_res.count()))
basl_max_extract = IE03D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IE03D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 360995
basl out id max source : 22562312399
basl out id min source : 20219541100
nombre lignes extract : 359289
basl out id max extract : 22544905422
basl out id min extract : 20219541100


In [None]:
# AE01D

In [None]:
## AE01D 30
AE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AE01D/LDM_PRD_FRA_AE01D_200930_072619.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AE01D_source.count()))
basl_max_source = AE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = AE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
AE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0613_PRD_20200930_20200930041157_AE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IE03D_res.count()))
basl_max_extract = AE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = AE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [136]:
## AE01D 1
AE01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AE01D/LDM_PRD_FRA_AE01D_201001_072758.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AE01D_source.count()))
basl_max_source = AE01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = AE01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
AE01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0613_PRD_20201001_20201001044216_AE01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AE01D_res.count()))
basl_max_extract = AE01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = AE01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 586770
basl out id max source : 22565110129
basl out id min source : 20270979927
nombre lignes extract : 24869
basl out id max extract : 22562288255
basl out id min extract : 20271098860


In [None]:
AE01D_source.select("LDGRP").distinct().show()

In [None]:
AE01D_res.select("LDGRP").distinct().show()

In [None]:
AE01D_source.select("BLART").distinct().show()

In [None]:
AE01D_res.select("BLART").distinct().show()

In [144]:
df_ae01d_corrige = AE01D_source.filter("(LDGRP NOT IN('I1','M1','K4','M3') OR LDGRP IS NULL) AND (BLART NOT IN ('PV','ZA'))")

In [146]:
df_ae01d_corrige.registerTempTable("df_ae01d")

In [151]:
AE01D_res.registerTempTable("df_ae01d_res")

In [149]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,12))
from df_ae01d
"""
df= sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,12))|
+--------------------------------------------------------+
|-74210044.240000550000                                  |
+--------------------------------------------------------+



In [152]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,12))
from df_ae01d_res
"""
df= sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,12))|
+--------------------------------------------------------+
|-74210044.160000160000                                  |
+--------------------------------------------------------+



In [150]:
query = """
SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,12))
from df_ae01d
"""
df= sqlContext.sql(query)
df.show(1,False)

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,12))|
+-------------------------------------------------------+
|-74314736.500000600000                                 |
+-------------------------------------------------------+



In [153]:
query = """
SELECT CAST(SUM(LADM_DMBTR) AS DECIMAL(38,12))
from df_ae01d_res
"""
df= sqlContext.sql(query)
df.show(1,False)

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,12))|
+-------------------------------------------------------+
|-74314736.420000120000                                 |
+-------------------------------------------------------+



In [142]:
AE01D_source.groupBy("LDGRP","BLART").count().show(50)

+-----+-----+------+
|LDGRP|BLART| count|
+-----+-----+------+
|   M3|   MI|   752|
|   L1|   ZA| 79888|
|   K4|   MX|   383|
| null|   MX|  1371|
|   L1|   MG|  4900|
|   I1|   PS|   160|
|   M1|   MI|   148|
| null|   MI|  4102|
|   M3|   MX|    33|
|   L1|   MX|     8|
|   I1|   ZA| 86486|
|   I1|   PV| 97804|
| null|   MR|  4947|
|   M1|   ZA| 90062|
|   L1|   MI|   313|
|   M1|   MR|   148|
| null|   MG|  7018|
|   K4|   MG|  7989|
|   M3|   MG|     8|
|   K4|   MI|  1270|
|   M1|   MG|     8|
|   M1|   MX|     4|
|   L1|   MR|   323|
|   K4|   MR|  3758|
|   M3|   MR|   752|
| null|   KN|  1696|
|   L1|   PV| 90412|
| null|   ZP|   111|
|   L1|   PS|    80|
|   M1|   PV|101836|
+-----+-----+------+



# CL05M

In [202]:
CL05M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/LDM_PRD_FRA_CL01M_201007_093810.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CL05M_source.count()))
basl_max_source = CL05M_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = CL05M_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
CL05M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0607_PRD_20201007_20201007104206_CL01M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CL05M_res.count()))
basl_max_extract = CL05M_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = CL05M_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

nombre lignes source : 411841
basl out id max source : 22601193049
basl out id min source : 22280878150
nombre lignes extract : 409821
basl out id max extract : 22601193049
basl out id min extract : 22280878150


In [215]:
CL05M_res.registerTempTable("cl05m_res")

In [218]:
CL05M_source.registerTempTable("cl05m_src")

# CL05M

In [203]:
df_ecarts = CL05M_source.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT").subtract(CL05M_res.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT"))

In [204]:
df_ecarts.registerTempTable("cl01m_ecarts")

In [214]:
CL05M_source.filter("BASL_OUT_ID NOT IN ('22529554024','22529554358','22529560960','22529561102','22529561250','22529700523','22565067073','22565067317','22565099821','22565099907','22565100107','22565100149','22565108575','22565108605','22529554182','22529560928','22529561152','22529561172','22529700647','22529708629','22565067103','22565067215','22565081353','22529561186','22529700303','22529700517','22529708711','22529708843','22565067287','22565067417','22565081183','22565081297','22565100103','22565100109','22565100181','22565100245','22565108609','22529561230','22529700271','22565081101','22565100373','22565108723','22529554242','22529561058','22529700369','22529700565','22529700721','22565067383','22565067501','22565081243','22565081331','22565099891','22529553898','22529553998','22529561016','22529561120','22529700465','22529700507','22565067199','22565081309','22565100095','22565100199','22565108405','22565108637','22565108813','22529554218','22529708907','22565067271','22565067637','22565108667','22529554286','22529700351','22529700437','22529708601','22529708859','22565067109','22565067299','22565081009','22565081131','22565100339','22565108515','22529553866','22529554342','22529561140','22529708829','22565080989','22565100141','22565100197','22529553968','22529554126','22529560930','22529560952','22529561270','22529700655','22529708717','22565067463','22565067621','22565081139','22565108645','22529561276','22529708681','22529708863','22565067083','22565067089','22565080969','22565081113','22565099829','22565099989','22565100187','22565100331','22529554084','22529700691','22529700713','22529708603','22565081371','22565099853','22565100067','22565100087','22529554270','22529700735','22565067071','22565067131','22565067523','22565080965','22565108539','22529553922','22529554284','22529561106','22529708655','22529708707','22565080999','22565081025','22565081077','22565081245','22565081279','22565100093','22565108409','22565108805','22529554240','22529708747','22529709015','22565067139','22565067473','22565067653','22565081369','22565099981','22565100201','22565100211','22565100227','22565108693','22529554154','22529554262','22529554300','22529561042','22529700547','22529700557','22529700607','22565067447','22565067531','22565067613','22565081053','22565100281','22529553896','22529553960','22529553990','22529553996','22529560996','22529700335','22529700399','22529708833','22565067097','22565067223','22565099833','22565100129','22529561060','22529709017','22565067327','22565067601','22565081085','22565100089','22565100121','22565108589','22565108639','22565108715','22529561130','22529700611','22529700771','22565067309','22565067555','22565081333','22565099987','22565100113','22529561086','22529561088','22565067177','22565099903','22565108477','22565108829','22529553906','22529554292','22529560958','22529700471','22529709031','22565081205','22529553978','22529561212','22529561254','22529700571','22529708641','22565067095','22565067279','22565067455','22565081147','22565100065','22565108549','22529554122','22529554232','22529554256','22529560868','22529700527','22529700609','22565067509','22565081149','22565108497','22529560884','22529700339','22529708897','22565067411','22529553892','22529553976','22529554098','22529561008','22565067313','22565067431','22565081213','22565108481','22565108619','22565108775','22529554000','22529554268','22529554334','22529700439','22529700573','22529700673','22529700759','22529708905','22529709001','22565081079','22565081163','22565099941','22565100329','22529554206','22529560998','22529700293','22529700651','22529700705','22529708933','22565067385','22565081325','22565081387','22529554174','22529554338','22529560916','22529561020','22529700479','22529700627','22565067101','22565067373','22565100041','22529554250','22529700541','22529700575','22529708775','22565067479','22565067487','22565081215','22565100365','22565108743','22529554324','22529708967','22565067261','22565067395','22565081023','22565081145','22565099841','22565100137','22565108529','22565108643','22529553894','22529554366','22529561018','22529700593','22529700645','22529708703','22529709033','22565067307','22565067561','22565081125','22565099875','22565099927','22565100159','22565100219','22529553948','22529554296','22529700301','22529700325','22529700417','22529700513','22529708845','22529708853','22565067351','22565081033','22565108531','22565108851','22529553936','22529554124','22529700285','22529700405','22529708613','22565067341','22529554094','22529554222','22529700773','22529708647','22529708889','22529709037','22565067405','22565067453','22565081171','22565099975','22565108629','22529553916','22529554298','22529708849','22565067225','22565067345','22565099837','22565099919','22565100207','22565100305','22565108463','22529553962','22529554026','22529561166','22529561196','22529700501','22529700689','22529708693','22529708861','22529708965','22565067227','22565081141','22565081201','22565081277','22565081345','22529560872','22529700553','22565067181','22565067231','22565067649','22565100355','22565100381','22565108597','22565099825','22529700309','22529700483','22529700503','22529700679','22529700687','22529708883','22565067475','22565108683','22565108761','22529561128','22565067505','22565100213','22565100311','22565108545','22565108661','22529554392','22529700329','22529700401','22529700537','22529708631','22565067187','22565081323','22565099925','22565108439','22565108655','22529553918','22529700487','22529700621','22529700725','22529700763','22565067275','22565067335','22565067409','22565067493','22565080949','22565080977','22565100203','22565108729','22529554082','22529554244','22529554370','22529561014','22529700745','22529700747','22565067091','22565067293','22565081311','22565099913','22529554120','22529700767','22529708605','22529708767','22529708811','22529708887','22565067393','22565081097','22565081317','22565099887','22565100071','22565100139','22565100193','22565100289','22565100361','22565108513','22529554302','22529554314','22529561174','22529700387','22529708735','22529708831','22529708979','22565067379','22565081071','22565081167','22565081281','22565100271','22565108543','22529554246','22529554326','22529554346','22529554396','22529560984','22529561066','22529561100','22529561154','22529561314','22529700539','22565067407','22565081063','22565100313','22565100385','22565108777','22529554320','22529700259','22529700311','22565081225','22565081275','22565100105','22565100309','22565108505','22565108517','22565108649','22565108807','22565108843','22529561052','22529708667','22529708765','22529708779','22565067283','22565080979','22565108845','22529554076','22529561274','22529561280','22529708671','22565108423','22565108841','22529554254','22529554386','22529561112','22529708689','22565067157','22565100259','22565108471','22529700467','22529700775','22529708985','22565067235','22565099943','22565100005','22565100055','22565100301','22529554104','22529561000','22529561076','22529700449','22529708617','22565099997','22565100351','22529561022','22529561148','22529561160','22529561238','22529700403','22565067495','22565067623','22565081247','22565100343','22565108621','22565108819','22529554192','22529561298','22529708701','22529708879','22565067597','22565081069','22565081127','22565081381','22565100265','22565100307','22565108419','22529554070','22529554204','22529561006','22529561176','22529700249','22529700685','22529700777','22529709013','22565067355','22565067361','22565081037','22565081271','22565100023','22565108691','22529553888','22529554290','22529561004','22529561206','22529561264','22529700275','22565067155','22565080983','22529553934','22529700509','22529708733','22565080985','22565081055','22565081289','22565099957','22529700681','22529708807','22565067251','22565067367','22565081067','22565100223','22565100325','22529554142','22529561240','22529561252','22529700433','22529708715','22529708997','22529709041','22565067195','22565067435','22565067533','22565099911','22565108581','22529554170','22529560888','22529560942','22529700319','22529700637','22529708939','22529708993','22565081235','22565108547','22565108751','22565108827','22565067161','22565067179','22565081339','22565100097','22565108551','22565108825','22529561080','22529561266','22529700413','22529700587','22565067239','22565081221','22565108503','22565108681','22565108719','22529554156','22529700355','22529700545','22529709023','22565067519','22565099867','22565100017','22565108489','22529554108','22529554112','22529561114','22529700295','22529700427','22565067269','22565067461','22565100273','22565108521','22529553952','22529553980','22529554312','22529561188','22565067249','22565067477','22565067503','22565067627','22565100335','22529553914','22529553924','22529554188','22529560870','22529560890','22529561044','22529700363','22529700583','22529700661','22529708621','22529708795','22529709021','22565067219','22565067301','22565067567','22529553886','22529708841','22565067303','22565067423','22565081103','22565099843','22565099921','22565099963','22565100241','22529553942','22565081015','22565081283','22565081341','22565099823','22565099893','22565100387','22529554030','22529554278','22529700697','22565067569','22565081155','22565081189','22565100341','22565108685','22529553900','22529700257','22529708723','22529708753','22529708839','22565100075','22565108573','22529554020','22529554172','22529554212','22529554340','22529561170','22529700419','22529700625','22529708743','22529708913','22565067415','22565081017','22565081051','22565081143','22565099861','22565100021','22565100269','22565108623','22565108627','22565108853','22529554150','22529554348','22529700313','22529700391','22529700443','22529708741','22565081151','22565081351','22565100391','22565108535','22565108803','22529553944','22529554090','22529554308','22529700653','22529708885','22565067169','22565080941','22565099885','22565099889','22565099945','22565100297','22565108541','22529554102','22529708609','22529708649','22529708947','22529708999','22565067115','22565067399','22565067581','22565081251','22565081267','22565100083','22565108483','22529553964','22529554046','22529554202','22529554274','22529554388','22529700353','22529708903','22565067255','22565100369','22529554100','22529700457','22529700525','22529708877','22565067349','22565067499','22565080943','22565100131','22565100345','22565108431','22529561312','22529700577','22565067077','22565067145','22565067149','22565067343','22565067625','22565080947','22565100147','22529553970','22529561074','22529700491','22529708597','22529708949','22565067599','22565080939','22565081347','22565100135','22565100275','22529561182','22529708683','22529709007','22565067369','22565099953','22565108493','22565108811','22529560918','22529560980','22529561126','22529700435','22529708611','22565067079','22565067619','22565081335','22529554006','22529554050','22529554072','22529700585','22529708805','22529709019','22565081109','22565081357','22565108647','22565108821','22529554200','22529554238','22529554352','22529560992','22529700595','22565067445','22565067525','22565099897','22565108713','22565108831','22529553880','22529554260','22529700367','22529708769','22565067337','22565067425','22565080967','22565081013','22565081043','22565099871','22565100237','22529553992','22529700343','22529700555','22565067553','22565081301','22565100359','22565108625','22565108791','22529554168','22529554210','22529700755','22529708951','22565067641','22565099917','22565099977','22565100069','22565108501','22565067117','22565081179','22565099923','22565100115','22565100283','22529554106','22529700359','22529700569','22529700589','22529708797','22529708817','22529708935','22565067401','22565067647','22565099973','22565108835','22529554140','22529554216','22529560914','22529700549','22529700703','22529708803','22529708837','22529708987','22529709009','22565067325','22565067347','22565081095','22565081273','22565081355','22565100183','22565100261','22565108709','22529554004','22529561184','22529700267','22529708651','22529708943','22565067471','22565067535','22565067609','22565100303','22565100337','22565108607','22529554138','22529554294','22529554354','22529560908','22529561204','22529700331','22529700397','22529700407','22529700717','22529700733','22529708963','22565067233','22565067365','22565067497','22565081219','22565099993','22565100229','22565100397','22529700251','22529700521','22529708957','22565067137','22529554230','22529554306','22529561198','22529708619','22529708901','22565067305','22565067439','22565067507','22565067539','22565067607','22565100287','22565108591','22529700603','22529700743','22565081197','22565100153','22565108487','22565108653','22529561104','22529708777','22529708789','22565067339','22565081305','22565081363','22565100315','22565108511','22529700447','22529700515','22529700643','22565081041','22565099863','22565100013','22565100239','22529554044','22529554062','22529554224','22529700349','22565067135','22565067459','22565099991','22565108601','22565108677','22529553926','22529561304','22529708809','22529708915','22529708929','22529709035','22565081115','22565099909','22565100353','22565108425','22529560892','22529561068','22529708669','22529708881','22565067391','22565081169','22565100285','22565108757','22529554178','22529561200','22529708659','22529708825','22565067329','22565067357','22565080953','22565080975','22565081385','22565099983','22565100019','22565108697','22565108785','22529561202','22529700373','22529700421','22529700529','22529700683','22529700769','22529708945','22565067387','22565099859','22565108755','22529560912','22529561012','22529561162','22529700719','22529700723','22529708713','22529708921','22565067141','22565081039','22565081135','22565081265','22565100143','22565100209','22529554048','22529554180','22529554266','22529561040','22529561158','22529700631','22529708607','22529708627','22529708705','22565067397','22565067451','22565081045','22565081181','22565081313','22565108801','22565108849','22529700493','22529708773','22565067107','22565067127','22565067247','22565081337','22565081365','22565081377','22565099827','22565108679','22529553954','22529554116','22529554208','22529560940','22529561084','22529700281','22529700461','22529700613','22529708691','22565067099','22565067211','22565067633','22565100395','22529554080','22529561026','22529700669','22565099951','22565100161','22565108633','22565108689','22529553932','22529560964','22529700445','22529700667','22529708819','22565081233','22565100389','22565108669','22529554078','22529554092','22529700263','22529700677','22529708685','22565067559','22565081087','22565081093','22565108421','22565108747','22529554008','22529554316','22529554394','22529708835','22529709011','22529709025','22565067153','22565081047','22565081105','22565100061','22565108485','22565108525','22529554328','22529554376','22529560978','22529700519','22565100029','22565100057','22565100253','22565108673','22565108703','22565108781','22529561070','22529700255','22529700429','22529700757','22529708745','22565067129','22565067165','22565067295','22565099931','22565108665','22565108731','22529553902','22529554056','22529700315','22529700665','22529708989','22565067253','22565067311','22565099967','22565108599','22565108773','22529554114','22529554148','22529554382','22529708643','22565081361','22565108437','22529553930','22529554066','22529560962','22529561234','22529561262','22529700357','22565067263','22565081253','22565100379','22565108499','22565108523','22565108749','22529554272','22529561232','22529700297','22529700379','22529700381','22565067587','22565067593','22565081257','22565100291','22565100377','22565108631','22529560948','22529561046','22529561096','22529700365','22529700693','22529708697','22529708917','22565067359','22565067363','22565081099','22565108443','22565108699','22529553920','22529554136','22529560874','22529560880','22529561306','22529709027','22565067085','22565067125','22565067389','22565081117','22565100175','22565108815','22529554130','22529700333','22529700701','22529708731','22529709003','22565067381','22565067537','22565067617','22565081091','22565108839','22529553870','22529554350','22529561290','22529561292','22529700287','22529700617','22529708699','22529708983','22565067217','22565067241','22565067333','22565099939','22529553910','22529554368','22529561168','22529700383','22529700385','22529708759','22565067449','22529554132','22529554228','22529561178','22529561224','22529700605','22565067113','22565081175','22565099851','22565108787','22529560936','22529560966','22529700671','22529708725','22529708729','22565067087','22565067209','22565081123','22565081137','22565099817','22565108695','22565108721','22529561282','22529700247','22529708709','22565067321','22565067481','22565067635','22565081005','22565081315','22565100039','22565100319','22565108705','22529554384','22529700543','22529708665','22565067193','22565067207','22565067577','22565081223','22565099961','22565100079','22565108595','22529553974','22529553988','22529700749','22529708727','22529708871','22565081203','22565081241','22565099857','22529554176','22529554198','22529554318','22529560886','22529561284','22529700273','22529700561','22529700761','22529708891','22565067285','22565081157','22565108577','22565108617','22529554186','22565067111','22565081059','22565100037','22565108847','22529560938','22529560994','22529561288','22529700475','22529708751','22565099869','22565108635','22529561048','22529561220','22529700323','22529708867','22565067159','22565067371','22565067605','22565081119','22565081177','22565100119','22565108797','22529553890','22529560944','22565067289','22565108571','22565108641','22565108793','22529553938','22529554074','22529561180','22529700277','22529700375','22529700485','22529708771','22529708851','22529708919','22565081295','22565100185','22565100215','22565100383','22565108727','22529553956','22529561244','22529708923','22565067133','22565067549','22565067645','22565108615','22565108711','22565108809','22529700709','22529708961','22565067257','22565067319','22565067437','22565081287','22565081303','22565081373','22565100189','22565100263','22565100321','22565100393','22529554010','22529561210','22529700751','22529708653','22565080957','22565081161','22565081327','22565099979','22565100117','22565100191','22565108753','22529561024','22529561118','22529700279','22565067469','22565080987','22565081031','22565081187','22565081261','22565100015','22529554058','22529554336','22529561246','22529708639','22529708757','22529708799','22529708857','22565067427','22565099965','22565108429','22565108467','22565108475','22529560932','22529560988','22529708635','22565067643','22565100051','22565100163','22565108433','22565108587','22565108651','22565108765','22565108769','22565108837','22529553884','22529554144','22529554234','22529561082','22529561116','22529561226','22529700455','22529708941','22565067511','22565067595','22565081107','22565081227','22565099839','22565100001','22565100177','22565108509','22565108593','22529560866','22529561236','22529700265','22529700531','22529700641','22529708633','22529708981','22565080937','22565100007','22565100101','22565100357','22529554236','22529560924','22529561038','22529700347','22529700393','22529708793','22565067591','22565067629','22565080995','22565081121','22565081383','22565099901','22565099905','22565100155','22565108675','22529554162','22529560974','22529700269','22529700599','22529708687','22529708977','22565067167','22565067433','22565067443','22565067521','22565081003','22565099879','22565100077','22565100375','22565108833','22529553878','22529561034','22529700649','22565067315','22565067331','22565067589','22565080955','22565099847','22565100009','22565100247','22565100255','22565108739','22529554360','22529560976','22529561056','22565067119','22565067441','22565067547','22565067573','22565080993','22565081159','22565100221','22565108473','22529554012','22529708623','22529708675','22565067143','22565067651','22565080961','22565081319','22565099815','22565099969','22565108527','22565108585','22529554052','22529554152','22529700245','22529700345','22529700481','22529708869','22565067541','22565081011','22565081231','22565081367','22565099819','22565108555','22565108611','22529553868','22529554128','22529560954','22529561032','22529561108','22529700261','22529700431','22529708625','22565067201','22565067483','22565080981','22565099855','22565099895','22565100233','22565108565','22529560986','22565067583','22565081133','22565081375','22565108435','22565108657','22529560894','22529561190','22529561258','22529700511','22565067543','22565099959','22565100249','22565108823','22529554226','22529561214','22529561268','22529708847','22529709029','22565067171','22565100073','22565100195','22565100367','22565108553','22565108745','22529561294','22529700495','22529708813','22529708855','22529708873','22565067485','22565081007','22565081029','22565108413','22565108767','22529554002','22529554164','22529561072','22529561156','22529561228','22529700597','22529700639','22529700657','22529708909','22565081083','22565099949','22565100047','22565108407','22529554160','22529554374','22565067123','22565067213','22565081057','22565108465','22529561256','22529700289','22529700663','22529708737','22529708801','22529708865','22529708973','22565100333','22565108479','22565108579','22529553874','22529553972','22529560906','22529561002','22529561064','22529561278','22529708927','22565067281','22565081001','22565081209','22565081293','22565108453','22565108741','22529560898','22529560970','22529560972','22529700389','22529708721','22529708787','22565081193','22565099835','22565099873','22565099929','22565108461','22529553912','22529554310','22529554390','22529561050','22529561192','22529561208','22529700497','22565067191','22565100151','22565100171','22565108817','22529560900','22529708781','22565067603','22565100027','22565100033','22565100081','22565100127','22529561150','22529700453','22529700489','22529700533','22529708895','22565081019','22565081329','22565100235','22565100267','22565100293','22565100347','22565108449','22565108455','22565108469','22529554034','22529560878','22529560920','22529700601','22529700659','22529708969','22529708995','22565100257','22565108789','22529554040','22529560882','22529561054','22565067265','22565080951','22565081207','22565099999','22565100251','22529554068','22529554276','22529560902','22529560926','22529700253','22529700477','22529708755','22565067189','22565067579','22565081129','22565099865','22565100145','22565100371','22529554248','22529700411','22529700469','22565081321','22565100123','22565108779','22529554038','22529554060','22529700337','22565067203','22565067491','22565080997','22565081211','22529708815','22565081199','22565099985','22565108411','22565108459','22529554096','22529561216','22529561296','22529700327','22529700505','22529700629','22529708955','22565080963','22565081195','22565081237','22565081291','22565081307','22565099899','22565099935','22565100045','22529554016','22529554086','22529554378','22529561260','22529700415','22529700737','22529708971','22565067297','22565081081','22565099849','22565100173','22565108559','22529700371','22529700441','22529708911','22565081021','22565108427','22565108495','22529553928','22529554036','22529554220','22529700591','22529700741','22565067403','22565067615','22565099955','22565108567','22565108671','22565108701','22529554018','22529554032','22529554252','22529561036','22529700395','22529700451','22565067075','22565067229','22565067565','22565081075','22565081299','22565100059','22565108737','22565108783','22529553882','22529553986','22529554014','22529554088','22529554110','22529561132','22529700567','22529708679','22529709005','22565080945','22565081217','22565081239','22565100011','22529554190','22529560876','22529560956','22529561248','22529561272','22529700707','22529708925','22565067323','22565100035','22565100111','22565100217','22529553984','22529700307','22565067173','22565067183','22565067267','22565099831','22565100003','22565100091','22529560990','22529561194','22565067259','22565067545','22565081027','22565108659','22565108733','22529561092','22565067151','22565067375','22565081263','22565100323','22565108457','22529554146','22529560910','22529561030','22529561138','22529561308','22529700291','22529700299','22529700619','22529708875','22565067429','22565067457','22565067465','22565067527','22565099845','22565100099','22529561098','22529700559','22529709039','22565067377','22565067467','22565067611','22565081165','22565100327','22565108533','22529700765','22529708615','22529709043','22565067197','22565067273','22565067585','22565100031','22565108771','22565108795','22529554282','22529560946','22529708785','22565081269','22565108507','22529553958','22529554118','22529554194','22529561094','22529561142','22529700535','22529700731','22529708661','22565100053','22565100277','22565108663','22529554158','22529554258','22529560950','22529708695','22565067205','22565080973','22565100225','22529553946','22529554288','22529561028','22529561218','22529700473','22565099877','22565100363','22565108569','22529554304','22529560968','22529700579','22529700581','22565067147','22565067243','22565080991','22565081073','22565081379','22565100025','22529554362','22529561144','22529700463','22529700739','22529708677','22565067421','22565081089','22565099947','22565100063','22565100349','22565108735','22529554214','22529700341','22529708637','22529708991','22565067631','22565081229','22565081285','22529553966','22529561062','22529561110','22529561136','22529561310','22529700563','22565067121','22565067237','22565067353','22565067489','22565067575','22565080971','22565081035','22565081049','22565081259','22565108415','22529554042','22529561122','22529561286','22529700551','22529553876','22529561242','22529700321','22529700409','22529708599','22529708761','22529708893','22565067529','22565067639','22565099883','22565100165','22565108725','22529554280','22529708645','22529708821','22565081343','22565100043','22565100179','22565108603','22565108717','22529554380','22529561124','22529561302','22529700283','22529700499','22529700615','22529708937','22529708975','22565067551','22565067571','22565108519','22565108537','22565108855','22529554372','22529561146','22529700695','22529700711','22529700715','22565081349','22565099933','22565099995','22565108759','22529554022','22529554064','22529554184','22529554264','22529700361','22565067081','22565081359','22529553940','22529554196','22529554344','22529560896','22529700459','22529700753','22529708673','22529708749','22529708791','22529709045','22565067513','22565080959','22565081065','22565081249','22565100205','22565108451','22565108687','22529700727','22529708959','22565067413','22565081111','22565099937','22565100231','22565100243','22565108447','22565108557','22565108707','22529554166','22529554364','22529560982','22529700623','22529700635','22565081061','22565100317','22529560922','22529700633','22565067093','22529553872','22529554134','22529554330','22529554332','22529554356','22529561090','22529561300','22529708719','22529708827','22529708953','22565067175','22565067277','22565067419','22565081153','22565099915','22565100085','22565108491','22565108561','22529553994','22529561010','22529561164','22529700317','22529700699','22529700729','22529708823','22565067517','22565100125','22565100169','22565100279','22565108583','22565108799','22529554028','22529554322','22529554398','22529561134','22529561222','22529700423','22529700675','22529708739','22529708763','22529708783','22565067221','22565067515','22565081191','22565081255','22565100157','22565100295','22565108563','22529553904','22529553908','22529700305','22529708657','22565067163','22565067291','22565067563','22565081173','22565081185','22565099971','22565100133','22529553982','22529560904','22529560934','22529700377','22529700425','22529708899','22529708931','22565067105','22565067245','22565067557','22565100049','22565100167','22565108417','22565108441','22565108445','22529553950','22529554054','22529561078','22529708663','22565067185','22565099881','22565100299','22565108613','22565108763')").subtract(CL05M_res).count()

231233

In [176]:
df_ecarts.limit(100).toPandas()

Unnamed: 0,BASL_OUT_ID,BUKRS,GJAHR,MONAT,RACCT
0,22529554020,9PNC,2020,9,5111001009
1,22529554046,9PNC,2020,9,5111001009
2,22529554118,9PNC,2020,9,5111001018
3,22529554258,9PNC,2020,9,5111001028
4,22529561218,9PPF,2020,9,5111001028
...,...,...,...,...,...
95,22529700737,9PNC,2020,9,5111001038
96,22565100015,9PNC,2020,9,5111001009
97,22565100107,9PNC,2020,9,5111001018
98,22565108513,9PPF,2020,9,5111001009


In [222]:
query = """
select CAST(SUM(TWBTRG) AS DECIMAL(38,15))
FROM cl05m_src
WHERE BASL_OUT_ID NOT IN ('22529554024','22529554358','22529560960','22529561102','22529561250','22529700523','22565067073','22565067317','22565099821','22565099907','22565100107','22565100149','22565108575','22565108605','22529554182','22529560928','22529561152','22529561172','22529700647','22529708629','22565067103','22565067215','22565081353','22529561186','22529700303','22529700517','22529708711','22529708843','22565067287','22565067417','22565081183','22565081297','22565100103','22565100109','22565100181','22565100245','22565108609','22529561230','22529700271','22565081101','22565100373','22565108723','22529554242','22529561058','22529700369','22529700565','22529700721','22565067383','22565067501','22565081243','22565081331','22565099891','22529553898','22529553998','22529561016','22529561120','22529700465','22529700507','22565067199','22565081309','22565100095','22565100199','22565108405','22565108637','22565108813','22529554218','22529708907','22565067271','22565067637','22565108667','22529554286','22529700351','22529700437','22529708601','22529708859','22565067109','22565067299','22565081009','22565081131','22565100339','22565108515','22529553866','22529554342','22529561140','22529708829','22565080989','22565100141','22565100197','22529553968','22529554126','22529560930','22529560952','22529561270','22529700655','22529708717','22565067463','22565067621','22565081139','22565108645','22529561276','22529708681','22529708863','22565067083','22565067089','22565080969','22565081113','22565099829','22565099989','22565100187','22565100331','22529554084','22529700691','22529700713','22529708603','22565081371','22565099853','22565100067','22565100087','22529554270','22529700735','22565067071','22565067131','22565067523','22565080965','22565108539','22529553922','22529554284','22529561106','22529708655','22529708707','22565080999','22565081025','22565081077','22565081245','22565081279','22565100093','22565108409','22565108805','22529554240','22529708747','22529709015','22565067139','22565067473','22565067653','22565081369','22565099981','22565100201','22565100211','22565100227','22565108693','22529554154','22529554262','22529554300','22529561042','22529700547','22529700557','22529700607','22565067447','22565067531','22565067613','22565081053','22565100281','22529553896','22529553960','22529553990','22529553996','22529560996','22529700335','22529700399','22529708833','22565067097','22565067223','22565099833','22565100129','22529561060','22529709017','22565067327','22565067601','22565081085','22565100089','22565100121','22565108589','22565108639','22565108715','22529561130','22529700611','22529700771','22565067309','22565067555','22565081333','22565099987','22565100113','22529561086','22529561088','22565067177','22565099903','22565108477','22565108829','22529553906','22529554292','22529560958','22529700471','22529709031','22565081205','22529553978','22529561212','22529561254','22529700571','22529708641','22565067095','22565067279','22565067455','22565081147','22565100065','22565108549','22529554122','22529554232','22529554256','22529560868','22529700527','22529700609','22565067509','22565081149','22565108497','22529560884','22529700339','22529708897','22565067411','22529553892','22529553976','22529554098','22529561008','22565067313','22565067431','22565081213','22565108481','22565108619','22565108775','22529554000','22529554268','22529554334','22529700439','22529700573','22529700673','22529700759','22529708905','22529709001','22565081079','22565081163','22565099941','22565100329','22529554206','22529560998','22529700293','22529700651','22529700705','22529708933','22565067385','22565081325','22565081387','22529554174','22529554338','22529560916','22529561020','22529700479','22529700627','22565067101','22565067373','22565100041','22529554250','22529700541','22529700575','22529708775','22565067479','22565067487','22565081215','22565100365','22565108743','22529554324','22529708967','22565067261','22565067395','22565081023','22565081145','22565099841','22565100137','22565108529','22565108643','22529553894','22529554366','22529561018','22529700593','22529700645','22529708703','22529709033','22565067307','22565067561','22565081125','22565099875','22565099927','22565100159','22565100219','22529553948','22529554296','22529700301','22529700325','22529700417','22529700513','22529708845','22529708853','22565067351','22565081033','22565108531','22565108851','22529553936','22529554124','22529700285','22529700405','22529708613','22565067341','22529554094','22529554222','22529700773','22529708647','22529708889','22529709037','22565067405','22565067453','22565081171','22565099975','22565108629','22529553916','22529554298','22529708849','22565067225','22565067345','22565099837','22565099919','22565100207','22565100305','22565108463','22529553962','22529554026','22529561166','22529561196','22529700501','22529700689','22529708693','22529708861','22529708965','22565067227','22565081141','22565081201','22565081277','22565081345','22529560872','22529700553','22565067181','22565067231','22565067649','22565100355','22565100381','22565108597','22565099825','22529700309','22529700483','22529700503','22529700679','22529700687','22529708883','22565067475','22565108683','22565108761','22529561128','22565067505','22565100213','22565100311','22565108545','22565108661','22529554392','22529700329','22529700401','22529700537','22529708631','22565067187','22565081323','22565099925','22565108439','22565108655','22529553918','22529700487','22529700621','22529700725','22529700763','22565067275','22565067335','22565067409','22565067493','22565080949','22565080977','22565100203','22565108729','22529554082','22529554244','22529554370','22529561014','22529700745','22529700747','22565067091','22565067293','22565081311','22565099913','22529554120','22529700767','22529708605','22529708767','22529708811','22529708887','22565067393','22565081097','22565081317','22565099887','22565100071','22565100139','22565100193','22565100289','22565100361','22565108513','22529554302','22529554314','22529561174','22529700387','22529708735','22529708831','22529708979','22565067379','22565081071','22565081167','22565081281','22565100271','22565108543','22529554246','22529554326','22529554346','22529554396','22529560984','22529561066','22529561100','22529561154','22529561314','22529700539','22565067407','22565081063','22565100313','22565100385','22565108777','22529554320','22529700259','22529700311','22565081225','22565081275','22565100105','22565100309','22565108505','22565108517','22565108649','22565108807','22565108843','22529561052','22529708667','22529708765','22529708779','22565067283','22565080979','22565108845','22529554076','22529561274','22529561280','22529708671','22565108423','22565108841','22529554254','22529554386','22529561112','22529708689','22565067157','22565100259','22565108471','22529700467','22529700775','22529708985','22565067235','22565099943','22565100005','22565100055','22565100301','22529554104','22529561000','22529561076','22529700449','22529708617','22565099997','22565100351','22529561022','22529561148','22529561160','22529561238','22529700403','22565067495','22565067623','22565081247','22565100343','22565108621','22565108819','22529554192','22529561298','22529708701','22529708879','22565067597','22565081069','22565081127','22565081381','22565100265','22565100307','22565108419','22529554070','22529554204','22529561006','22529561176','22529700249','22529700685','22529700777','22529709013','22565067355','22565067361','22565081037','22565081271','22565100023','22565108691','22529553888','22529554290','22529561004','22529561206','22529561264','22529700275','22565067155','22565080983','22529553934','22529700509','22529708733','22565080985','22565081055','22565081289','22565099957','22529700681','22529708807','22565067251','22565067367','22565081067','22565100223','22565100325','22529554142','22529561240','22529561252','22529700433','22529708715','22529708997','22529709041','22565067195','22565067435','22565067533','22565099911','22565108581','22529554170','22529560888','22529560942','22529700319','22529700637','22529708939','22529708993','22565081235','22565108547','22565108751','22565108827','22565067161','22565067179','22565081339','22565100097','22565108551','22565108825','22529561080','22529561266','22529700413','22529700587','22565067239','22565081221','22565108503','22565108681','22565108719','22529554156','22529700355','22529700545','22529709023','22565067519','22565099867','22565100017','22565108489','22529554108','22529554112','22529561114','22529700295','22529700427','22565067269','22565067461','22565100273','22565108521','22529553952','22529553980','22529554312','22529561188','22565067249','22565067477','22565067503','22565067627','22565100335','22529553914','22529553924','22529554188','22529560870','22529560890','22529561044','22529700363','22529700583','22529700661','22529708621','22529708795','22529709021','22565067219','22565067301','22565067567','22529553886','22529708841','22565067303','22565067423','22565081103','22565099843','22565099921','22565099963','22565100241','22529553942','22565081015','22565081283','22565081341','22565099823','22565099893','22565100387','22529554030','22529554278','22529700697','22565067569','22565081155','22565081189','22565100341','22565108685','22529553900','22529700257','22529708723','22529708753','22529708839','22565100075','22565108573','22529554020','22529554172','22529554212','22529554340','22529561170','22529700419','22529700625','22529708743','22529708913','22565067415','22565081017','22565081051','22565081143','22565099861','22565100021','22565100269','22565108623','22565108627','22565108853','22529554150','22529554348','22529700313','22529700391','22529700443','22529708741','22565081151','22565081351','22565100391','22565108535','22565108803','22529553944','22529554090','22529554308','22529700653','22529708885','22565067169','22565080941','22565099885','22565099889','22565099945','22565100297','22565108541','22529554102','22529708609','22529708649','22529708947','22529708999','22565067115','22565067399','22565067581','22565081251','22565081267','22565100083','22565108483','22529553964','22529554046','22529554202','22529554274','22529554388','22529700353','22529708903','22565067255','22565100369','22529554100','22529700457','22529700525','22529708877','22565067349','22565067499','22565080943','22565100131','22565100345','22565108431','22529561312','22529700577','22565067077','22565067145','22565067149','22565067343','22565067625','22565080947','22565100147','22529553970','22529561074','22529700491','22529708597','22529708949','22565067599','22565080939','22565081347','22565100135','22565100275','22529561182','22529708683','22529709007','22565067369','22565099953','22565108493','22565108811','22529560918','22529560980','22529561126','22529700435','22529708611','22565067079','22565067619','22565081335','22529554006','22529554050','22529554072','22529700585','22529708805','22529709019','22565081109','22565081357','22565108647','22565108821','22529554200','22529554238','22529554352','22529560992','22529700595','22565067445','22565067525','22565099897','22565108713','22565108831','22529553880','22529554260','22529700367','22529708769','22565067337','22565067425','22565080967','22565081013','22565081043','22565099871','22565100237','22529553992','22529700343','22529700555','22565067553','22565081301','22565100359','22565108625','22565108791','22529554168','22529554210','22529700755','22529708951','22565067641','22565099917','22565099977','22565100069','22565108501','22565067117','22565081179','22565099923','22565100115','22565100283','22529554106','22529700359','22529700569','22529700589','22529708797','22529708817','22529708935','22565067401','22565067647','22565099973','22565108835','22529554140','22529554216','22529560914','22529700549','22529700703','22529708803','22529708837','22529708987','22529709009','22565067325','22565067347','22565081095','22565081273','22565081355','22565100183','22565100261','22565108709','22529554004','22529561184','22529700267','22529708651','22529708943','22565067471','22565067535','22565067609','22565100303','22565100337','22565108607','22529554138','22529554294','22529554354','22529560908','22529561204','22529700331','22529700397','22529700407','22529700717','22529700733','22529708963','22565067233','22565067365','22565067497','22565081219','22565099993','22565100229','22565100397','22529700251','22529700521','22529708957','22565067137','22529554230','22529554306','22529561198','22529708619','22529708901','22565067305','22565067439','22565067507','22565067539','22565067607','22565100287','22565108591','22529700603','22529700743','22565081197','22565100153','22565108487','22565108653','22529561104','22529708777','22529708789','22565067339','22565081305','22565081363','22565100315','22565108511','22529700447','22529700515','22529700643','22565081041','22565099863','22565100013','22565100239','22529554044','22529554062','22529554224','22529700349','22565067135','22565067459','22565099991','22565108601','22565108677','22529553926','22529561304','22529708809','22529708915','22529708929','22529709035','22565081115','22565099909','22565100353','22565108425','22529560892','22529561068','22529708669','22529708881','22565067391','22565081169','22565100285','22565108757','22529554178','22529561200','22529708659','22529708825','22565067329','22565067357','22565080953','22565080975','22565081385','22565099983','22565100019','22565108697','22565108785','22529561202','22529700373','22529700421','22529700529','22529700683','22529700769','22529708945','22565067387','22565099859','22565108755','22529560912','22529561012','22529561162','22529700719','22529700723','22529708713','22529708921','22565067141','22565081039','22565081135','22565081265','22565100143','22565100209','22529554048','22529554180','22529554266','22529561040','22529561158','22529700631','22529708607','22529708627','22529708705','22565067397','22565067451','22565081045','22565081181','22565081313','22565108801','22565108849','22529700493','22529708773','22565067107','22565067127','22565067247','22565081337','22565081365','22565081377','22565099827','22565108679','22529553954','22529554116','22529554208','22529560940','22529561084','22529700281','22529700461','22529700613','22529708691','22565067099','22565067211','22565067633','22565100395','22529554080','22529561026','22529700669','22565099951','22565100161','22565108633','22565108689','22529553932','22529560964','22529700445','22529700667','22529708819','22565081233','22565100389','22565108669','22529554078','22529554092','22529700263','22529700677','22529708685','22565067559','22565081087','22565081093','22565108421','22565108747','22529554008','22529554316','22529554394','22529708835','22529709011','22529709025','22565067153','22565081047','22565081105','22565100061','22565108485','22565108525','22529554328','22529554376','22529560978','22529700519','22565100029','22565100057','22565100253','22565108673','22565108703','22565108781','22529561070','22529700255','22529700429','22529700757','22529708745','22565067129','22565067165','22565067295','22565099931','22565108665','22565108731','22529553902','22529554056','22529700315','22529700665','22529708989','22565067253','22565067311','22565099967','22565108599','22565108773','22529554114','22529554148','22529554382','22529708643','22565081361','22565108437','22529553930','22529554066','22529560962','22529561234','22529561262','22529700357','22565067263','22565081253','22565100379','22565108499','22565108523','22565108749','22529554272','22529561232','22529700297','22529700379','22529700381','22565067587','22565067593','22565081257','22565100291','22565100377','22565108631','22529560948','22529561046','22529561096','22529700365','22529700693','22529708697','22529708917','22565067359','22565067363','22565081099','22565108443','22565108699','22529553920','22529554136','22529560874','22529560880','22529561306','22529709027','22565067085','22565067125','22565067389','22565081117','22565100175','22565108815','22529554130','22529700333','22529700701','22529708731','22529709003','22565067381','22565067537','22565067617','22565081091','22565108839','22529553870','22529554350','22529561290','22529561292','22529700287','22529700617','22529708699','22529708983','22565067217','22565067241','22565067333','22565099939','22529553910','22529554368','22529561168','22529700383','22529700385','22529708759','22565067449','22529554132','22529554228','22529561178','22529561224','22529700605','22565067113','22565081175','22565099851','22565108787','22529560936','22529560966','22529700671','22529708725','22529708729','22565067087','22565067209','22565081123','22565081137','22565099817','22565108695','22565108721','22529561282','22529700247','22529708709','22565067321','22565067481','22565067635','22565081005','22565081315','22565100039','22565100319','22565108705','22529554384','22529700543','22529708665','22565067193','22565067207','22565067577','22565081223','22565099961','22565100079','22565108595','22529553974','22529553988','22529700749','22529708727','22529708871','22565081203','22565081241','22565099857','22529554176','22529554198','22529554318','22529560886','22529561284','22529700273','22529700561','22529700761','22529708891','22565067285','22565081157','22565108577','22565108617','22529554186','22565067111','22565081059','22565100037','22565108847','22529560938','22529560994','22529561288','22529700475','22529708751','22565099869','22565108635','22529561048','22529561220','22529700323','22529708867','22565067159','22565067371','22565067605','22565081119','22565081177','22565100119','22565108797','22529553890','22529560944','22565067289','22565108571','22565108641','22565108793','22529553938','22529554074','22529561180','22529700277','22529700375','22529700485','22529708771','22529708851','22529708919','22565081295','22565100185','22565100215','22565100383','22565108727','22529553956','22529561244','22529708923','22565067133','22565067549','22565067645','22565108615','22565108711','22565108809','22529700709','22529708961','22565067257','22565067319','22565067437','22565081287','22565081303','22565081373','22565100189','22565100263','22565100321','22565100393','22529554010','22529561210','22529700751','22529708653','22565080957','22565081161','22565081327','22565099979','22565100117','22565100191','22565108753','22529561024','22529561118','22529700279','22565067469','22565080987','22565081031','22565081187','22565081261','22565100015','22529554058','22529554336','22529561246','22529708639','22529708757','22529708799','22529708857','22565067427','22565099965','22565108429','22565108467','22565108475','22529560932','22529560988','22529708635','22565067643','22565100051','22565100163','22565108433','22565108587','22565108651','22565108765','22565108769','22565108837','22529553884','22529554144','22529554234','22529561082','22529561116','22529561226','22529700455','22529708941','22565067511','22565067595','22565081107','22565081227','22565099839','22565100001','22565100177','22565108509','22565108593','22529560866','22529561236','22529700265','22529700531','22529700641','22529708633','22529708981','22565080937','22565100007','22565100101','22565100357','22529554236','22529560924','22529561038','22529700347','22529700393','22529708793','22565067591','22565067629','22565080995','22565081121','22565081383','22565099901','22565099905','22565100155','22565108675','22529554162','22529560974','22529700269','22529700599','22529708687','22529708977','22565067167','22565067433','22565067443','22565067521','22565081003','22565099879','22565100077','22565100375','22565108833','22529553878','22529561034','22529700649','22565067315','22565067331','22565067589','22565080955','22565099847','22565100009','22565100247','22565100255','22565108739','22529554360','22529560976','22529561056','22565067119','22565067441','22565067547','22565067573','22565080993','22565081159','22565100221','22565108473','22529554012','22529708623','22529708675','22565067143','22565067651','22565080961','22565081319','22565099815','22565099969','22565108527','22565108585','22529554052','22529554152','22529700245','22529700345','22529700481','22529708869','22565067541','22565081011','22565081231','22565081367','22565099819','22565108555','22565108611','22529553868','22529554128','22529560954','22529561032','22529561108','22529700261','22529700431','22529708625','22565067201','22565067483','22565080981','22565099855','22565099895','22565100233','22565108565','22529560986','22565067583','22565081133','22565081375','22565108435','22565108657','22529560894','22529561190','22529561258','22529700511','22565067543','22565099959','22565100249','22565108823','22529554226','22529561214','22529561268','22529708847','22529709029','22565067171','22565100073','22565100195','22565100367','22565108553','22565108745','22529561294','22529700495','22529708813','22529708855','22529708873','22565067485','22565081007','22565081029','22565108413','22565108767','22529554002','22529554164','22529561072','22529561156','22529561228','22529700597','22529700639','22529700657','22529708909','22565081083','22565099949','22565100047','22565108407','22529554160','22529554374','22565067123','22565067213','22565081057','22565108465','22529561256','22529700289','22529700663','22529708737','22529708801','22529708865','22529708973','22565100333','22565108479','22565108579','22529553874','22529553972','22529560906','22529561002','22529561064','22529561278','22529708927','22565067281','22565081001','22565081209','22565081293','22565108453','22565108741','22529560898','22529560970','22529560972','22529700389','22529708721','22529708787','22565081193','22565099835','22565099873','22565099929','22565108461','22529553912','22529554310','22529554390','22529561050','22529561192','22529561208','22529700497','22565067191','22565100151','22565100171','22565108817','22529560900','22529708781','22565067603','22565100027','22565100033','22565100081','22565100127','22529561150','22529700453','22529700489','22529700533','22529708895','22565081019','22565081329','22565100235','22565100267','22565100293','22565100347','22565108449','22565108455','22565108469','22529554034','22529560878','22529560920','22529700601','22529700659','22529708969','22529708995','22565100257','22565108789','22529554040','22529560882','22529561054','22565067265','22565080951','22565081207','22565099999','22565100251','22529554068','22529554276','22529560902','22529560926','22529700253','22529700477','22529708755','22565067189','22565067579','22565081129','22565099865','22565100145','22565100371','22529554248','22529700411','22529700469','22565081321','22565100123','22565108779','22529554038','22529554060','22529700337','22565067203','22565067491','22565080997','22565081211','22529708815','22565081199','22565099985','22565108411','22565108459','22529554096','22529561216','22529561296','22529700327','22529700505','22529700629','22529708955','22565080963','22565081195','22565081237','22565081291','22565081307','22565099899','22565099935','22565100045','22529554016','22529554086','22529554378','22529561260','22529700415','22529700737','22529708971','22565067297','22565081081','22565099849','22565100173','22565108559','22529700371','22529700441','22529708911','22565081021','22565108427','22565108495','22529553928','22529554036','22529554220','22529700591','22529700741','22565067403','22565067615','22565099955','22565108567','22565108671','22565108701','22529554018','22529554032','22529554252','22529561036','22529700395','22529700451','22565067075','22565067229','22565067565','22565081075','22565081299','22565100059','22565108737','22565108783','22529553882','22529553986','22529554014','22529554088','22529554110','22529561132','22529700567','22529708679','22529709005','22565080945','22565081217','22565081239','22565100011','22529554190','22529560876','22529560956','22529561248','22529561272','22529700707','22529708925','22565067323','22565100035','22565100111','22565100217','22529553984','22529700307','22565067173','22565067183','22565067267','22565099831','22565100003','22565100091','22529560990','22529561194','22565067259','22565067545','22565081027','22565108659','22565108733','22529561092','22565067151','22565067375','22565081263','22565100323','22565108457','22529554146','22529560910','22529561030','22529561138','22529561308','22529700291','22529700299','22529700619','22529708875','22565067429','22565067457','22565067465','22565067527','22565099845','22565100099','22529561098','22529700559','22529709039','22565067377','22565067467','22565067611','22565081165','22565100327','22565108533','22529700765','22529708615','22529709043','22565067197','22565067273','22565067585','22565100031','22565108771','22565108795','22529554282','22529560946','22529708785','22565081269','22565108507','22529553958','22529554118','22529554194','22529561094','22529561142','22529700535','22529700731','22529708661','22565100053','22565100277','22565108663','22529554158','22529554258','22529560950','22529708695','22565067205','22565080973','22565100225','22529553946','22529554288','22529561028','22529561218','22529700473','22565099877','22565100363','22565108569','22529554304','22529560968','22529700579','22529700581','22565067147','22565067243','22565080991','22565081073','22565081379','22565100025','22529554362','22529561144','22529700463','22529700739','22529708677','22565067421','22565081089','22565099947','22565100063','22565100349','22565108735','22529554214','22529700341','22529708637','22529708991','22565067631','22565081229','22565081285','22529553966','22529561062','22529561110','22529561136','22529561310','22529700563','22565067121','22565067237','22565067353','22565067489','22565067575','22565080971','22565081035','22565081049','22565081259','22565108415','22529554042','22529561122','22529561286','22529700551','22529553876','22529561242','22529700321','22529700409','22529708599','22529708761','22529708893','22565067529','22565067639','22565099883','22565100165','22565108725','22529554280','22529708645','22529708821','22565081343','22565100043','22565100179','22565108603','22565108717','22529554380','22529561124','22529561302','22529700283','22529700499','22529700615','22529708937','22529708975','22565067551','22565067571','22565108519','22565108537','22565108855','22529554372','22529561146','22529700695','22529700711','22529700715','22565081349','22565099933','22565099995','22565108759','22529554022','22529554064','22529554184','22529554264','22529700361','22565067081','22565081359','22529553940','22529554196','22529554344','22529560896','22529700459','22529700753','22529708673','22529708749','22529708791','22529709045','22565067513','22565080959','22565081065','22565081249','22565100205','22565108451','22565108687','22529700727','22529708959','22565067413','22565081111','22565099937','22565100231','22565100243','22565108447','22565108557','22565108707','22529554166','22529554364','22529560982','22529700623','22529700635','22565081061','22565100317','22529560922','22529700633','22565067093','22529553872','22529554134','22529554330','22529554332','22529554356','22529561090','22529561300','22529708719','22529708827','22529708953','22565067175','22565067277','22565067419','22565081153','22565099915','22565100085','22565108491','22565108561','22529553994','22529561010','22529561164','22529700317','22529700699','22529700729','22529708823','22565067517','22565100125','22565100169','22565100279','22565108583','22565108799','22529554028','22529554322','22529554398','22529561134','22529561222','22529700423','22529700675','22529708739','22529708763','22529708783','22565067221','22565067515','22565081191','22565081255','22565100157','22565100295','22565108563','22529553904','22529553908','22529700305','22529708657','22565067163','22565067291','22565067563','22565081173','22565081185','22565099971','22565100133','22529553982','22529560904','22529560934','22529700377','22529700425','22529708899','22529708931','22565067105','22565067245','22565067557','22565100049','22565100167','22565108417','22565108441','22565108445','22529553950','22529554054','22529561078','22529708663','22565067185','22565099881','22565100299','22565108613','22565108763')
"""
df = sqlContext.sql(query)
df.show(1,False)

+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|456056376.309994500000000                          |
+---------------------------------------------------+



In [223]:
query = """
select CAST(SUM(TWBTRG) AS DECIMAL(38,15))
FROM cl05m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|456056376.309996400000000                          |
+---------------------------------------------------+



In [None]:
query = """
select CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM cl05m_res
WHERE BASL_OUT_ID NOT IN ('22529554024','22529554358','22529560960','22529561102','22529561250','22529700523','22565067073','22565067317','22565099821','22565099907','22565100107','22565100149','22565108575','22565108605','22529554182','22529560928','22529561152','22529561172','22529700647','22529708629','22565067103','22565067215','22565081353','22529561186','22529700303','22529700517','22529708711','22529708843','22565067287','22565067417','22565081183','22565081297','22565100103','22565100109','22565100181','22565100245','22565108609','22529561230','22529700271','22565081101','22565100373','22565108723','22529554242','22529561058','22529700369','22529700565','22529700721','22565067383','22565067501','22565081243','22565081331','22565099891','22529553898','22529553998','22529561016','22529561120','22529700465','22529700507','22565067199','22565081309','22565100095','22565100199','22565108405','22565108637','22565108813','22529554218','22529708907','22565067271','22565067637','22565108667','22529554286','22529700351','22529700437','22529708601','22529708859','22565067109','22565067299','22565081009','22565081131','22565100339','22565108515','22529553866','22529554342','22529561140','22529708829','22565080989','22565100141','22565100197','22529553968','22529554126','22529560930','22529560952','22529561270','22529700655','22529708717','22565067463','22565067621','22565081139','22565108645','22529561276','22529708681','22529708863','22565067083','22565067089','22565080969','22565081113','22565099829','22565099989','22565100187','22565100331','22529554084','22529700691','22529700713','22529708603','22565081371','22565099853','22565100067','22565100087','22529554270','22529700735','22565067071','22565067131','22565067523','22565080965','22565108539','22529553922','22529554284','22529561106','22529708655','22529708707','22565080999','22565081025','22565081077','22565081245','22565081279','22565100093','22565108409','22565108805','22529554240','22529708747','22529709015','22565067139','22565067473','22565067653','22565081369','22565099981','22565100201','22565100211','22565100227','22565108693','22529554154','22529554262','22529554300','22529561042','22529700547','22529700557','22529700607','22565067447','22565067531','22565067613','22565081053','22565100281','22529553896','22529553960','22529553990','22529553996','22529560996','22529700335','22529700399','22529708833','22565067097','22565067223','22565099833','22565100129','22529561060','22529709017','22565067327','22565067601','22565081085','22565100089','22565100121','22565108589','22565108639','22565108715','22529561130','22529700611','22529700771','22565067309','22565067555','22565081333','22565099987','22565100113','22529561086','22529561088','22565067177','22565099903','22565108477','22565108829','22529553906','22529554292','22529560958','22529700471','22529709031','22565081205','22529553978','22529561212','22529561254','22529700571','22529708641','22565067095','22565067279','22565067455','22565081147','22565100065','22565108549','22529554122','22529554232','22529554256','22529560868','22529700527','22529700609','22565067509','22565081149','22565108497','22529560884','22529700339','22529708897','22565067411','22529553892','22529553976','22529554098','22529561008','22565067313','22565067431','22565081213','22565108481','22565108619','22565108775','22529554000','22529554268','22529554334','22529700439','22529700573','22529700673','22529700759','22529708905','22529709001','22565081079','22565081163','22565099941','22565100329','22529554206','22529560998','22529700293','22529700651','22529700705','22529708933','22565067385','22565081325','22565081387','22529554174','22529554338','22529560916','22529561020','22529700479','22529700627','22565067101','22565067373','22565100041','22529554250','22529700541','22529700575','22529708775','22565067479','22565067487','22565081215','22565100365','22565108743','22529554324','22529708967','22565067261','22565067395','22565081023','22565081145','22565099841','22565100137','22565108529','22565108643','22529553894','22529554366','22529561018','22529700593','22529700645','22529708703','22529709033','22565067307','22565067561','22565081125','22565099875','22565099927','22565100159','22565100219','22529553948','22529554296','22529700301','22529700325','22529700417','22529700513','22529708845','22529708853','22565067351','22565081033','22565108531','22565108851','22529553936','22529554124','22529700285','22529700405','22529708613','22565067341','22529554094','22529554222','22529700773','22529708647','22529708889','22529709037','22565067405','22565067453','22565081171','22565099975','22565108629','22529553916','22529554298','22529708849','22565067225','22565067345','22565099837','22565099919','22565100207','22565100305','22565108463','22529553962','22529554026','22529561166','22529561196','22529700501','22529700689','22529708693','22529708861','22529708965','22565067227','22565081141','22565081201','22565081277','22565081345','22529560872','22529700553','22565067181','22565067231','22565067649','22565100355','22565100381','22565108597','22565099825','22529700309','22529700483','22529700503','22529700679','22529700687','22529708883','22565067475','22565108683','22565108761','22529561128','22565067505','22565100213','22565100311','22565108545','22565108661','22529554392','22529700329','22529700401','22529700537','22529708631','22565067187','22565081323','22565099925','22565108439','22565108655','22529553918','22529700487','22529700621','22529700725','22529700763','22565067275','22565067335','22565067409','22565067493','22565080949','22565080977','22565100203','22565108729','22529554082','22529554244','22529554370','22529561014','22529700745','22529700747','22565067091','22565067293','22565081311','22565099913','22529554120','22529700767','22529708605','22529708767','22529708811','22529708887','22565067393','22565081097','22565081317','22565099887','22565100071','22565100139','22565100193','22565100289','22565100361','22565108513','22529554302','22529554314','22529561174','22529700387','22529708735','22529708831','22529708979','22565067379','22565081071','22565081167','22565081281','22565100271','22565108543','22529554246','22529554326','22529554346','22529554396','22529560984','22529561066','22529561100','22529561154','22529561314','22529700539','22565067407','22565081063','22565100313','22565100385','22565108777','22529554320','22529700259','22529700311','22565081225','22565081275','22565100105','22565100309','22565108505','22565108517','22565108649','22565108807','22565108843','22529561052','22529708667','22529708765','22529708779','22565067283','22565080979','22565108845','22529554076','22529561274','22529561280','22529708671','22565108423','22565108841','22529554254','22529554386','22529561112','22529708689','22565067157','22565100259','22565108471','22529700467','22529700775','22529708985','22565067235','22565099943','22565100005','22565100055','22565100301','22529554104','22529561000','22529561076','22529700449','22529708617','22565099997','22565100351','22529561022','22529561148','22529561160','22529561238','22529700403','22565067495','22565067623','22565081247','22565100343','22565108621','22565108819','22529554192','22529561298','22529708701','22529708879','22565067597','22565081069','22565081127','22565081381','22565100265','22565100307','22565108419','22529554070','22529554204','22529561006','22529561176','22529700249','22529700685','22529700777','22529709013','22565067355','22565067361','22565081037','22565081271','22565100023','22565108691','22529553888','22529554290','22529561004','22529561206','22529561264','22529700275','22565067155','22565080983','22529553934','22529700509','22529708733','22565080985','22565081055','22565081289','22565099957','22529700681','22529708807','22565067251','22565067367','22565081067','22565100223','22565100325','22529554142','22529561240','22529561252','22529700433','22529708715','22529708997','22529709041','22565067195','22565067435','22565067533','22565099911','22565108581','22529554170','22529560888','22529560942','22529700319','22529700637','22529708939','22529708993','22565081235','22565108547','22565108751','22565108827','22565067161','22565067179','22565081339','22565100097','22565108551','22565108825','22529561080','22529561266','22529700413','22529700587','22565067239','22565081221','22565108503','22565108681','22565108719','22529554156','22529700355','22529700545','22529709023','22565067519','22565099867','22565100017','22565108489','22529554108','22529554112','22529561114','22529700295','22529700427','22565067269','22565067461','22565100273','22565108521','22529553952','22529553980','22529554312','22529561188','22565067249','22565067477','22565067503','22565067627','22565100335','22529553914','22529553924','22529554188','22529560870','22529560890','22529561044','22529700363','22529700583','22529700661','22529708621','22529708795','22529709021','22565067219','22565067301','22565067567','22529553886','22529708841','22565067303','22565067423','22565081103','22565099843','22565099921','22565099963','22565100241','22529553942','22565081015','22565081283','22565081341','22565099823','22565099893','22565100387','22529554030','22529554278','22529700697','22565067569','22565081155','22565081189','22565100341','22565108685','22529553900','22529700257','22529708723','22529708753','22529708839','22565100075','22565108573','22529554020','22529554172','22529554212','22529554340','22529561170','22529700419','22529700625','22529708743','22529708913','22565067415','22565081017','22565081051','22565081143','22565099861','22565100021','22565100269','22565108623','22565108627','22565108853','22529554150','22529554348','22529700313','22529700391','22529700443','22529708741','22565081151','22565081351','22565100391','22565108535','22565108803','22529553944','22529554090','22529554308','22529700653','22529708885','22565067169','22565080941','22565099885','22565099889','22565099945','22565100297','22565108541','22529554102','22529708609','22529708649','22529708947','22529708999','22565067115','22565067399','22565067581','22565081251','22565081267','22565100083','22565108483','22529553964','22529554046','22529554202','22529554274','22529554388','22529700353','22529708903','22565067255','22565100369','22529554100','22529700457','22529700525','22529708877','22565067349','22565067499','22565080943','22565100131','22565100345','22565108431','22529561312','22529700577','22565067077','22565067145','22565067149','22565067343','22565067625','22565080947','22565100147','22529553970','22529561074','22529700491','22529708597','22529708949','22565067599','22565080939','22565081347','22565100135','22565100275','22529561182','22529708683','22529709007','22565067369','22565099953','22565108493','22565108811','22529560918','22529560980','22529561126','22529700435','22529708611','22565067079','22565067619','22565081335','22529554006','22529554050','22529554072','22529700585','22529708805','22529709019','22565081109','22565081357','22565108647','22565108821','22529554200','22529554238','22529554352','22529560992','22529700595','22565067445','22565067525','22565099897','22565108713','22565108831','22529553880','22529554260','22529700367','22529708769','22565067337','22565067425','22565080967','22565081013','22565081043','22565099871','22565100237','22529553992','22529700343','22529700555','22565067553','22565081301','22565100359','22565108625','22565108791','22529554168','22529554210','22529700755','22529708951','22565067641','22565099917','22565099977','22565100069','22565108501','22565067117','22565081179','22565099923','22565100115','22565100283','22529554106','22529700359','22529700569','22529700589','22529708797','22529708817','22529708935','22565067401','22565067647','22565099973','22565108835','22529554140','22529554216','22529560914','22529700549','22529700703','22529708803','22529708837','22529708987','22529709009','22565067325','22565067347','22565081095','22565081273','22565081355','22565100183','22565100261','22565108709','22529554004','22529561184','22529700267','22529708651','22529708943','22565067471','22565067535','22565067609','22565100303','22565100337','22565108607','22529554138','22529554294','22529554354','22529560908','22529561204','22529700331','22529700397','22529700407','22529700717','22529700733','22529708963','22565067233','22565067365','22565067497','22565081219','22565099993','22565100229','22565100397','22529700251','22529700521','22529708957','22565067137','22529554230','22529554306','22529561198','22529708619','22529708901','22565067305','22565067439','22565067507','22565067539','22565067607','22565100287','22565108591','22529700603','22529700743','22565081197','22565100153','22565108487','22565108653','22529561104','22529708777','22529708789','22565067339','22565081305','22565081363','22565100315','22565108511','22529700447','22529700515','22529700643','22565081041','22565099863','22565100013','22565100239','22529554044','22529554062','22529554224','22529700349','22565067135','22565067459','22565099991','22565108601','22565108677','22529553926','22529561304','22529708809','22529708915','22529708929','22529709035','22565081115','22565099909','22565100353','22565108425','22529560892','22529561068','22529708669','22529708881','22565067391','22565081169','22565100285','22565108757','22529554178','22529561200','22529708659','22529708825','22565067329','22565067357','22565080953','22565080975','22565081385','22565099983','22565100019','22565108697','22565108785','22529561202','22529700373','22529700421','22529700529','22529700683','22529700769','22529708945','22565067387','22565099859','22565108755','22529560912','22529561012','22529561162','22529700719','22529700723','22529708713','22529708921','22565067141','22565081039','22565081135','22565081265','22565100143','22565100209','22529554048','22529554180','22529554266','22529561040','22529561158','22529700631','22529708607','22529708627','22529708705','22565067397','22565067451','22565081045','22565081181','22565081313','22565108801','22565108849','22529700493','22529708773','22565067107','22565067127','22565067247','22565081337','22565081365','22565081377','22565099827','22565108679','22529553954','22529554116','22529554208','22529560940','22529561084','22529700281','22529700461','22529700613','22529708691','22565067099','22565067211','22565067633','22565100395','22529554080','22529561026','22529700669','22565099951','22565100161','22565108633','22565108689','22529553932','22529560964','22529700445','22529700667','22529708819','22565081233','22565100389','22565108669','22529554078','22529554092','22529700263','22529700677','22529708685','22565067559','22565081087','22565081093','22565108421','22565108747','22529554008','22529554316','22529554394','22529708835','22529709011','22529709025','22565067153','22565081047','22565081105','22565100061','22565108485','22565108525','22529554328','22529554376','22529560978','22529700519','22565100029','22565100057','22565100253','22565108673','22565108703','22565108781','22529561070','22529700255','22529700429','22529700757','22529708745','22565067129','22565067165','22565067295','22565099931','22565108665','22565108731','22529553902','22529554056','22529700315','22529700665','22529708989','22565067253','22565067311','22565099967','22565108599','22565108773','22529554114','22529554148','22529554382','22529708643','22565081361','22565108437','22529553930','22529554066','22529560962','22529561234','22529561262','22529700357','22565067263','22565081253','22565100379','22565108499','22565108523','22565108749','22529554272','22529561232','22529700297','22529700379','22529700381','22565067587','22565067593','22565081257','22565100291','22565100377','22565108631','22529560948','22529561046','22529561096','22529700365','22529700693','22529708697','22529708917','22565067359','22565067363','22565081099','22565108443','22565108699','22529553920','22529554136','22529560874','22529560880','22529561306','22529709027','22565067085','22565067125','22565067389','22565081117','22565100175','22565108815','22529554130','22529700333','22529700701','22529708731','22529709003','22565067381','22565067537','22565067617','22565081091','22565108839','22529553870','22529554350','22529561290','22529561292','22529700287','22529700617','22529708699','22529708983','22565067217','22565067241','22565067333','22565099939','22529553910','22529554368','22529561168','22529700383','22529700385','22529708759','22565067449','22529554132','22529554228','22529561178','22529561224','22529700605','22565067113','22565081175','22565099851','22565108787','22529560936','22529560966','22529700671','22529708725','22529708729','22565067087','22565067209','22565081123','22565081137','22565099817','22565108695','22565108721','22529561282','22529700247','22529708709','22565067321','22565067481','22565067635','22565081005','22565081315','22565100039','22565100319','22565108705','22529554384','22529700543','22529708665','22565067193','22565067207','22565067577','22565081223','22565099961','22565100079','22565108595','22529553974','22529553988','22529700749','22529708727','22529708871','22565081203','22565081241','22565099857','22529554176','22529554198','22529554318','22529560886','22529561284','22529700273','22529700561','22529700761','22529708891','22565067285','22565081157','22565108577','22565108617','22529554186','22565067111','22565081059','22565100037','22565108847','22529560938','22529560994','22529561288','22529700475','22529708751','22565099869','22565108635','22529561048','22529561220','22529700323','22529708867','22565067159','22565067371','22565067605','22565081119','22565081177','22565100119','22565108797','22529553890','22529560944','22565067289','22565108571','22565108641','22565108793','22529553938','22529554074','22529561180','22529700277','22529700375','22529700485','22529708771','22529708851','22529708919','22565081295','22565100185','22565100215','22565100383','22565108727','22529553956','22529561244','22529708923','22565067133','22565067549','22565067645','22565108615','22565108711','22565108809','22529700709','22529708961','22565067257','22565067319','22565067437','22565081287','22565081303','22565081373','22565100189','22565100263','22565100321','22565100393','22529554010','22529561210','22529700751','22529708653','22565080957','22565081161','22565081327','22565099979','22565100117','22565100191','22565108753','22529561024','22529561118','22529700279','22565067469','22565080987','22565081031','22565081187','22565081261','22565100015','22529554058','22529554336','22529561246','22529708639','22529708757','22529708799','22529708857','22565067427','22565099965','22565108429','22565108467','22565108475','22529560932','22529560988','22529708635','22565067643','22565100051','22565100163','22565108433','22565108587','22565108651','22565108765','22565108769','22565108837','22529553884','22529554144','22529554234','22529561082','22529561116','22529561226','22529700455','22529708941','22565067511','22565067595','22565081107','22565081227','22565099839','22565100001','22565100177','22565108509','22565108593','22529560866','22529561236','22529700265','22529700531','22529700641','22529708633','22529708981','22565080937','22565100007','22565100101','22565100357','22529554236','22529560924','22529561038','22529700347','22529700393','22529708793','22565067591','22565067629','22565080995','22565081121','22565081383','22565099901','22565099905','22565100155','22565108675','22529554162','22529560974','22529700269','22529700599','22529708687','22529708977','22565067167','22565067433','22565067443','22565067521','22565081003','22565099879','22565100077','22565100375','22565108833','22529553878','22529561034','22529700649','22565067315','22565067331','22565067589','22565080955','22565099847','22565100009','22565100247','22565100255','22565108739','22529554360','22529560976','22529561056','22565067119','22565067441','22565067547','22565067573','22565080993','22565081159','22565100221','22565108473','22529554012','22529708623','22529708675','22565067143','22565067651','22565080961','22565081319','22565099815','22565099969','22565108527','22565108585','22529554052','22529554152','22529700245','22529700345','22529700481','22529708869','22565067541','22565081011','22565081231','22565081367','22565099819','22565108555','22565108611','22529553868','22529554128','22529560954','22529561032','22529561108','22529700261','22529700431','22529708625','22565067201','22565067483','22565080981','22565099855','22565099895','22565100233','22565108565','22529560986','22565067583','22565081133','22565081375','22565108435','22565108657','22529560894','22529561190','22529561258','22529700511','22565067543','22565099959','22565100249','22565108823','22529554226','22529561214','22529561268','22529708847','22529709029','22565067171','22565100073','22565100195','22565100367','22565108553','22565108745','22529561294','22529700495','22529708813','22529708855','22529708873','22565067485','22565081007','22565081029','22565108413','22565108767','22529554002','22529554164','22529561072','22529561156','22529561228','22529700597','22529700639','22529700657','22529708909','22565081083','22565099949','22565100047','22565108407','22529554160','22529554374','22565067123','22565067213','22565081057','22565108465','22529561256','22529700289','22529700663','22529708737','22529708801','22529708865','22529708973','22565100333','22565108479','22565108579','22529553874','22529553972','22529560906','22529561002','22529561064','22529561278','22529708927','22565067281','22565081001','22565081209','22565081293','22565108453','22565108741','22529560898','22529560970','22529560972','22529700389','22529708721','22529708787','22565081193','22565099835','22565099873','22565099929','22565108461','22529553912','22529554310','22529554390','22529561050','22529561192','22529561208','22529700497','22565067191','22565100151','22565100171','22565108817','22529560900','22529708781','22565067603','22565100027','22565100033','22565100081','22565100127','22529561150','22529700453','22529700489','22529700533','22529708895','22565081019','22565081329','22565100235','22565100267','22565100293','22565100347','22565108449','22565108455','22565108469','22529554034','22529560878','22529560920','22529700601','22529700659','22529708969','22529708995','22565100257','22565108789','22529554040','22529560882','22529561054','22565067265','22565080951','22565081207','22565099999','22565100251','22529554068','22529554276','22529560902','22529560926','22529700253','22529700477','22529708755','22565067189','22565067579','22565081129','22565099865','22565100145','22565100371','22529554248','22529700411','22529700469','22565081321','22565100123','22565108779','22529554038','22529554060','22529700337','22565067203','22565067491','22565080997','22565081211','22529708815','22565081199','22565099985','22565108411','22565108459','22529554096','22529561216','22529561296','22529700327','22529700505','22529700629','22529708955','22565080963','22565081195','22565081237','22565081291','22565081307','22565099899','22565099935','22565100045','22529554016','22529554086','22529554378','22529561260','22529700415','22529700737','22529708971','22565067297','22565081081','22565099849','22565100173','22565108559','22529700371','22529700441','22529708911','22565081021','22565108427','22565108495','22529553928','22529554036','22529554220','22529700591','22529700741','22565067403','22565067615','22565099955','22565108567','22565108671','22565108701','22529554018','22529554032','22529554252','22529561036','22529700395','22529700451','22565067075','22565067229','22565067565','22565081075','22565081299','22565100059','22565108737','22565108783','22529553882','22529553986','22529554014','22529554088','22529554110','22529561132','22529700567','22529708679','22529709005','22565080945','22565081217','22565081239','22565100011','22529554190','22529560876','22529560956','22529561248','22529561272','22529700707','22529708925','22565067323','22565100035','22565100111','22565100217','22529553984','22529700307','22565067173','22565067183','22565067267','22565099831','22565100003','22565100091','22529560990','22529561194','22565067259','22565067545','22565081027','22565108659','22565108733','22529561092','22565067151','22565067375','22565081263','22565100323','22565108457','22529554146','22529560910','22529561030','22529561138','22529561308','22529700291','22529700299','22529700619','22529708875','22565067429','22565067457','22565067465','22565067527','22565099845','22565100099','22529561098','22529700559','22529709039','22565067377','22565067467','22565067611','22565081165','22565100327','22565108533','22529700765','22529708615','22529709043','22565067197','22565067273','22565067585','22565100031','22565108771','22565108795','22529554282','22529560946','22529708785','22565081269','22565108507','22529553958','22529554118','22529554194','22529561094','22529561142','22529700535','22529700731','22529708661','22565100053','22565100277','22565108663','22529554158','22529554258','22529560950','22529708695','22565067205','22565080973','22565100225','22529553946','22529554288','22529561028','22529561218','22529700473','22565099877','22565100363','22565108569','22529554304','22529560968','22529700579','22529700581','22565067147','22565067243','22565080991','22565081073','22565081379','22565100025','22529554362','22529561144','22529700463','22529700739','22529708677','22565067421','22565081089','22565099947','22565100063','22565100349','22565108735','22529554214','22529700341','22529708637','22529708991','22565067631','22565081229','22565081285','22529553966','22529561062','22529561110','22529561136','22529561310','22529700563','22565067121','22565067237','22565067353','22565067489','22565067575','22565080971','22565081035','22565081049','22565081259','22565108415','22529554042','22529561122','22529561286','22529700551','22529553876','22529561242','22529700321','22529700409','22529708599','22529708761','22529708893','22565067529','22565067639','22565099883','22565100165','22565108725','22529554280','22529708645','22529708821','22565081343','22565100043','22565100179','22565108603','22565108717','22529554380','22529561124','22529561302','22529700283','22529700499','22529700615','22529708937','22529708975','22565067551','22565067571','22565108519','22565108537','22565108855','22529554372','22529561146','22529700695','22529700711','22529700715','22565081349','22565099933','22565099995','22565108759','22529554022','22529554064','22529554184','22529554264','22529700361','22565067081','22565081359','22529553940','22529554196','22529554344','22529560896','22529700459','22529700753','22529708673','22529708749','22529708791','22529709045','22565067513','22565080959','22565081065','22565081249','22565100205','22565108451','22565108687','22529700727','22529708959','22565067413','22565081111','22565099937','22565100231','22565100243','22565108447','22565108557','22565108707','22529554166','22529554364','22529560982','22529700623','22529700635','22565081061','22565100317','22529560922','22529700633','22565067093','22529553872','22529554134','22529554330','22529554332','22529554356','22529561090','22529561300','22529708719','22529708827','22529708953','22565067175','22565067277','22565067419','22565081153','22565099915','22565100085','22565108491','22565108561','22529553994','22529561010','22529561164','22529700317','22529700699','22529700729','22529708823','22565067517','22565100125','22565100169','22565100279','22565108583','22565108799','22529554028','22529554322','22529554398','22529561134','22529561222','22529700423','22529700675','22529708739','22529708763','22529708783','22565067221','22565067515','22565081191','22565081255','22565100157','22565100295','22565108563','22529553904','22529553908','22529700305','22529708657','22565067163','22565067291','22565067563','22565081173','22565081185','22565099971','22565100133','22529553982','22529560904','22529560934','22529700377','22529700425','22529708899','22529708931','22565067105','22565067245','22565067557','22565100049','22565100167','22565108417','22565108441','22565108445','22529553950','22529554054','22529561078','22529708663','22565067185','22565099881','22565100299','22565108613','22565108763')
"""
df = sqlContext.sql(query)
df.show(1,False)

In [209]:
query = """
select basl.BASL_OUT_ID, basl.LDGRP, basl.CPUDT
FROM BASL_AZF_DATE_CPT AS basl
RIGHT JOIN cl01m_ecarts AS cl01m ON basl.BASL_OUT_ID = cl01m.BASL_OUT_ID
"""
df = sqlContext.sql(query)

In [213]:
df.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/analyse_cl01m/cl01m4.csv",header=True,sep=';')

In [211]:
df.count()

2020

In [177]:
df_ecarts.select("MONAT").distinct().show()

+-----+
|MONAT|
+-----+
|    9|
+-----+



# CL02M

In [224]:
CL02M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/LDM_PRD_FRA_CL02M_201007_093811.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CL05M_source.count()))
basl_max_source = CL02M_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = CL02M_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
CL02M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0607_PRD_20201007_20201007105233_CL02M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CL02M_res.count()))
basl_max_extract = CL02M_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = CL02M_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 944889
basl out id max source : 22601192532
basl out id min source : 22309632900
nombre lignes extract : 941839
basl out id max extract : 22601192532
basl out id min extract : 22309632900


In [225]:
944889 - 941839

3050

In [226]:
df_ecarts = CL02M_source.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT").subtract(CL02M_res.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT"))

In [227]:
df_ecarts.count()

3050

In [229]:
df_ecarts.registerTempTable("cl02m_ecart")

In [228]:
df_ecarts.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/analyse_cl02m/cl02.csv",header=True,sep=';')

In [230]:
query = """
select basl.BASL_OUT_ID, basl.LDGRP, basl.CPUDT
FROM BASL_AZF_DATE_CPT AS basl
RIGHT JOIN cl02m_ecart AS cl02m ON basl.BASL_OUT_ID = cl02m.BASL_OUT_ID
"""
df = sqlContext.sql(query)
df.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/analyse_cl02m/cl02_ecarts.csv",header=True,sep=';')

In [232]:
CL02M_source_corrige = CL02M_source.filter("BASL_OUT_ID NOT IN ('22529554760','22529554870','22529555120','22529561540','22529562068','22529562076','22529701047','22529701405','22529701517','22529709119','22529709723','22565067735','22565081477','22565081823','22565081851','22565100609','22565100869','22565100993','22529561430','22529561780','22529701197','22529709741','22529709751','22565067729','22565068291','22565068309','22565068533','22565100503','22565101215','22565109223','22565109331','22565109677','22529554572','22529554590','22529554854','22529555108','22529701073','22529701275','22529701491','22529709533','22565067859','22565068209','22565068457','22565068489','22565081493','22565082011','22565082083','22565100631','22565100669','22565100829','22565101085','22565101091','22565101193','22565101225','22529554600','22529554614','22529554866','22529554898','22529555082','22529701303','22529709721','22565068531','22565081479','22565081635','22565082183','22565101181','22565109111','22565109365','22565109371','22565109663','22529554456','22529554628','22529554764','22529555178','22529561622','22529701035','22529701357','22529709831','22565081761','22565100803','22565101105','22565108989','22529554702','22529554912','22529555132','22529561860','22529562098','22529700829','22529701179','22529701591','22565081699','22565100917','22565100937','22565109557','22529554888','22529562150','22529701593','22529709515','22529709839','22565068173','22565068365','22565081833','22565101205','22565101361','22529554488','22529555080','22529555306','22529700821','22529701523','22529709299','22529709727','22529709745','22565068051','22565081707','22565081755','22565081775','22565081791','22565082049','22565082131','22565082197','22565082201','22565101155','22565101275','22565109207','22565109413','22565109479','22529554644','22529555252','22529700887','22529701321','22529709293','22529709351','22529709399','22529709473','22529709567','22565068195','22565068389','22565081499','22565081757','22565100657','22565101315','22565109221','22529561828','22529561972','22529701061','22529701249','22529701565','22529701613','22529709391','22529709535','22529709791','22565067855','22565068077','22565068185','22565081811','22565101223','22565109147','22565109239','22565109377','22529554602','22529561558','22529700957','22529700961','22529701169','22529709575','22529709847','22565067871','22565067989','22565068521','22565081517','22565081655','22565081789','22565082023','22565082031','22565101141','22565109529','22529554468','22529554632','22529554792','22529561386','22529561838','22529701345','22529701533','22529709237','22529709387','22529709579','22529709837','22565068473','22565081637','22565081859','22565100651','22565109277','22565109327','22565109469','22565109653','22529554646','22529554774','22529701139','22529701263','22529709323','22529709457','22565081505','22565081831','22565082179','22565100465','22565100795','22565101039','22565101113','22565101125','22565101333','22565109087','22529562090','22529701019','22529701367','22529709477','22529709803','22565067923','22565068503','22565068511','22565101271','22565109271','22565109389','22529561880','22529700883','22529700965','22529701269','22565067879','22565068055','22565068079','22565081611','22565081629','22565082133','22565100789','22565101229','22565109407','22565109475','22529562012','22529700819','22529700949','22529701053','22529709417','22529709825','22565067701','22565067723','22565067939','22565068355','22565081469','22565100453','22565108963','22565109059','22565109341','22529554494','22529561514','22529561824','22529701429','22529701531','22529709129','22529709683','22529709813','22529709815','22565081445','22565082229','22565109263','22565109439','22565109669','22529554778','22529554896','22529554974','22529561394','22529561504','22529700995','22529701141','22529701459','22565081889','22565100619','22565100719','22565100847','22565100987','22565109587','22565109635','22565109647','22529554432','22529554994','22529555012','22529561550','22529562078','22529701127','22529709259','22529709717','22529709865','22565068295','22565068395','22565068515','22565081619','22565081749','22565100485','22565101247','22565109129','22565109607','22529555126','22529700993','22529701215','22529709437','22529709821','22565068159','22565068211','22565081881','22565082009','22565101183','22565109117','22565109345','22529554442','22529554864','22529555146','22529561546','22529561796','22529561832','22529561978','22529701295','22529701447','22529709465','22565067877','22565067913','22565068177','22565081571','22565082227','22565100663','22565100691','22565101357','22565109203','22565109535','22565109621','22529561624','22529561834','22529701131','22529701157','22529701177','22529701229','22529701455','22529701549','22529709125','22529709503','22529709765','22565100489','22565100589','22565100625','22565100671','22565101197','22565101323','22565101351','22565109209','22565109241','22565109273','22565109517','22529554470','22529554574','22529554686','22529554880','22529561758','22529561836','22529701159','22529701453','22529701679','22529709795','22565068057','22565068307','22565081457','22565081939','22565082105','22565100731','22565101121','22565101127','22529555172','22529701583','22529709743','22565067981','22565068097','22565068385','22565082161','22565082223','22565100693','22565100799','22565101297','22565108919','22565109119','22529554616','22529554684','22529554918','22529561794','22529709553','22565067731','22565067755','22565068227','22565068289','22565082135','22565082181','22565100451','22565109157','22565109199','22529554624','22529555112','22529555242','22529561576','22529561914','22529562052','22529701235','22529701305','22529709559','22565068419','22565082019','22565100633','22565100955','22565101045','22565109079','22529554758','22529554990','22529555258','22529561376','22529561976','22529701041','22529709103','22529709157','22529709247','22565067899','22565067901','22565068197','22565068285','22565082111','22565082127','22565100887','22565100907','22565109131','22565109573','22529554618','22529555228','22529555262','22529561798','22529562020','22529700999','22529701055','22529701075','22529701171','22529701371','22529701495','22529709153','22529709403','22529709479','22529709599','22529709689','22565068189','22565081639','22565082171','22565082215','22565109321','22565109337','22565109581','22529554834','22529561688','22529561792','22529701513','22529701567','22529701595','22529709257','22529709495','22565067775','22565067835','22565068497','22565081585','22565081943','22565100501','22565109613','22529561668','22529701335','22529701407','22529709233','22529709289','22565081645','22565082087','22565109299','22565109343','22565109435','22529554604','22529554704','22529555294','22529709109','22529709159','22529709505','22529709695','22529709805','22565068407','22565068481','22565068553','22565081463','22565081801','22565082157','22565100697','22565100809','22565100849','22565100965','22529555284','22529561790','22529561854','22529561930','22529562010','22529701155','22529701369','22529709497','22529709521','22529709657','22565067919','22565081815','22565081871','22565109213','22529554566','22529561366','22529561374','22529561690','22529561876','22529701219','22529701617','22529709303','22565068409','22565081621','22565082147','22565100843','22565109543','22529554756','22529562056','22529700977','22529701499','22529709229','22529709507','22529709819','22565081461','22565081907','22565100653','22565100701','22565100723','22565100839','22565100845','22565100865','22565101001','22565101195','22565108959','22565109579','22565109589','22565109641','22529555266','22529561600','22529561954','22529562096','22529709603','22529709667','22529709719','22565068305','22565068399','22565068617','22565081587','22565081691','22565082151','22565100667','22565100817','22565101341','22565108955','22529554650','22529554848','22529554900','22529554958','22529555230','22529555240','22529561670','22529561750','22529701069','22529701221','22529709551','22529709597','22529709647','22565067911','22565068059','22565081603','22565081689','22565100897','22565109159','22565109675','22529555110','22529562112','22529701079','22529701223','22529701277','22529701655','22529709849','22565067769','22565068287','22565068449','22565068591','22565068597','22565081661','22565082117','22565101025','22565109329','22565109335','22529554676','22529555194','22529561578','22529561604','22529561892','22529561932','22529701063','22529701673','22565068075','22565068121','22565068179','22565081497','22565101325','22565109523','22529554996','22529555250','22529561694','22529561764','22529561816','22529709869','22565081767','22565100435','22565100787','22565100871','22565101015','22565101273','22529554474','22529561532','22529561594','22529561958','22529709263','22529709389','22529709811','22565068275','22565068397','22565081873','22565082069','22565100647','22565109247','22565109441','22565109611','22565109657','22529554462','22529554588','22529554960','22529555232','22529555234','22529561616','22529561800','22529701011','22529709275','22565067955','22565068271','22565081813','22565100499','22565101185','22565108939','22565108985','22565109245','22565109265','22565109317','22565109399','22529555296','22529561562','22529561618','22529700839','22529701433','22565081751','22565082043','22565100455','22565100943','22565101071','22565101345','22565109051','22565109149','22565109655','22529554846','22529561802','22529701161','22565068155','22565068313','22565068439','22565068539','22565081597','22565081615','22565081925','22565082071','22565100505','22565101135','22565109235','22529554496','22529554830','22529555220','22529555310','22529561732','22529701033','22565081911','22565101199','22565101289','22565109679','22529554908','22529555254','22529562024','22529701167','22529701297','22529701363','22529709221','22529709315','22565068043','22565081731','22565101231','22565108981','22565109121','22565109311','22529554826','22529561542','22529561874','22529700963','22529709623','22529709625','22565068239','22565100851','22529554638','22529554672','22529555210','22529561424','22529561666','22529561734','22529701143','22529701199','22529701659','22529709407','22529709679','22529709715','22529709809','22565067689','22565067751','22565068069','22565068231','22565081617','22565081745','22565100617','22565100841','22565100979','22565109105','22529554664','22529554762','22529554926','22529561422','22529561782','22529701227','22529701431','22565068049','22565068487','22565068609','22565081649','22565081743','22565081875','22565081965','22565082021','22565109249','22565109705','22529554592','22529561710','22529701183','22529701609','22529709793','22565067885','22565068293','22565068403','22565068479','22565081979','22565100615','22565109289','22565109293','22565109303','22529554832','22529555062','22529561742','22529561894','22529701239','22529709691','22529709709','22529709877','22565068345','22565101267','22565109415','22529554772','22529554878','22529700827','22529701627','22529709527','22565081583','22565108977','22529554464','22529554596','22529554658','22529554816','22529555124','22529561682','22529561846','22529561868','22529700831','22529701417','22529701651','22529701669','22529709337','22529709681','22565067929','22565068029','22565068425','22565081849','22565081941','22565100709','22565101151','22565101311','22565109307','22565109513','22565109597','22529554586','22529554648','22529554924','22529555004','22529555092','22529561486','22529701291','22529701399','22529701487','22529709273','22529709501','22529709737','22529709779','22565068099','22565100771','22565100983','22565109693','22529555050','22529561788','22529700815','22529701343','22529701603','22565081507','22565082089','22565100493','22565101243','22565109101','22565109125','22565109515','22529554466','22529554962','22529561588','22529561680','22529562042','22529562088','22529701071','22529701375','22529701525','22529701647','22529701663','22529709469','22565068485','22565100877','22565101143','22565109063','22565109471','22529555290','22529561492','22529700835','22529709135','22565081643','22565101189','22565108951','22565109591','22529554584','22529554668','22529554786','22529554920','22529555036','22529555102','22529561656','22529700953','22529701351','22529709131','22529709383','22529709447','22529709453','22565067737','22565067747','22565067987','22565068241','22565068261','22565068311','22565082005','22565100469','22565101049','22565101279','22565101339','22565109071','22565109467','22529554818','22529561746','22565068321','22565068429','22565068573','22565081825','22565100889','22565101237','22529554858','22529561918','22529701435','22529709309','22565068035','22565081669','22565081835','22565082231','22565100975','22565101019','22565101099','22565109253','22565109685','22529554570','22529555270','22529561760','22529561812','22529561840','22529562008','22529709485','22529709749','22565067951','22565068139','22565081591','22565081609','22565081961','22565082079','22565100597','22565101319','22565109075','22565109219','22529561768','22529701181','22529701571','22529709141','22565067709','22565067715','22565067883','22565067907','22565068433','22565081575','22565081877','22565081947','22565108947','22565109357','22529554938','22529701293','22529701377','22529709101','22529709279','22529709411','22565082137','22565100661','22565109095','22565109447','22529554902','22529554964','22529555046','22529561510','22529561706','22529562030','22529700825','22529700983','22529701039','22529709151','22529709253','22529709353','22565067891','22565068215','22565068273','22565068437','22565081857','22565081891','22565081999','22565082037','22565100997','22565109151','22529554800','22529562082','22529701083','22529701147','22529709165','22565068431','22565068435','22565068517','22565081447','22565081607','22565082113','22565100593','22565101219','22529554444','22529555084','22529555264','22529561410','22529561508','22529561592','22529709115','22529709261','22529709269','22529709601','22529709843','22565068315','22565068529','22565081673','22565100613','22565100711','22565101067','22565101173','22565101301','22565109437','22529561702','22529701211','22529709161','22529709429','22529709531','22529709555','22529709611','22565081521','22565082107','22565082119','22565100475','22565100935','22529554630','22529554852','22529554976','22529555114','22529555304','22529561396','22529701645','22529709243','22529709451','22565081879','22565082199','22565100665','22565100959','22529554660','22529555010','22529561560','22529561808','22529562006','22529562062','22529562092','22529562100','22529709105','22565068045','22565068107','22565068351','22565081483','22565082175','22565100641','22565109477','22529554840','22529700863','22529701401','22529709561','22529709799','22565068087','22565068443','22565068567','22565081485','22565101033','22529554856','22529554892','22529555058','22529555312','22529561598','22529701337','22529701605','22529709835','22529709863','22565067843','22565067961','22565068047','22565068417','22565100969','22565101107','22565109333','22565109403','22565109667','22565109673','22529554940','22529555060','22529555218','22529561420','22529561552','22529701273','22529701559','22529701681','22529709325','22529709673','22565068053','22565068157','22565068163','22565068205','22565068413','22565068459','22565082169','22565100659','22565101057','22565101331','22565109383','22529554568','22529555286','22529561548','22529701195','22529701203','22565067967','22565068063','22565068223','22565068337','22565081491','22565081771','22565081829','22565100585','22565100861','22565100957','22565108949','22529554886','22529561506','22529561990','22529562132','22529701025','22529701423','22529709277','22529709331','22529709355','22529709513','22529709615','22565081451','22565100995','22565101305','22565109073','22565109571','22529554776','22529554928','22529701365','22529701475','22529709227','22565081671','22565082219','22565101063','22565109283','22529554460','22529554968','22529561398','22529561674','22529701005','22529701413','22529701615','22529701643','22529709133','22529709143','22565067695','22565068325','22565068563','22565082125','22565100915','22565101139','22565109427','22565109681','22529554998','22529701283','22529701339','22529709729','22565068461','22565081923','22565082025','22565100511','22565100629','22565100779','22565109169','22565109393','22565109671','22529554806','22529555150','22529561400','22529561778','22529701685','22529709455','22529709677','22529709785','22529709873','22565068181','22565068353','22565068401','22565081931','22565081951','22565100673','22565109351','22565109405','22529554796','22529554934','22529554942','22529554954','22529555154','22529561678','22529561852','22529562066','22529701001','22529701299','22529701653','22529709231','22565068033','22565068383','22565082191','22565100999','22565109039','22565109359','22529555134','22529561418','22529561896','22529701601','22565068269','22565068491','22565068543','22565068615','22565081865','22565082063','22565101255','22565101337','22529554610','22529555032','22529555184','22529555298','22529561534','22529700991','22529701581','22529709241','22529709665','22529709883','22565068129','22565082015','22565100811','22565100927','22565101169','22565109423','22565109551','22565109627','22529700881','22529701391','22529709117','22565082041','22565082045','22565100471','22565101261','22565109487','22529554484','22529555008','22529555244','22529555292','22529561392','22529561998','22565067873','22565067903','22565068113','22565068151','22565081703','22565081969','22565082123','22565100863','22565101343','22565109267','22565109401','22565109565','22529554576','22529554688','22529555022','22529561882','22529562144','22529700845','22529701151','22529709449','22529709735','22565068135','22565068237','22565100729','22565100873','22565101187','22565109275','22565109443','22529561866','22529562106','22529701463','22529701483','22529709339','22529709767','22565067839','22565068093','22565068371','22565068455','22565081759','22565081781','22565081841','22565082221','22565082239','22565100923','22565101283','22565109411','22529555096','22529561744','22529562064','22529701635','22529709415','22529709541','22565067693','22565067959','22565068187','22565081793','22565081885','22565081997','22565082163','22529554504','22529555170','22529561596','22529709123','22529709529','22565068143','22565068213','22565082091','22565100677','22565100815','22565100921','22565101129','22565101309','22565109165','22565109281','22529555246','22529562028','22529701301','22529701539','22529709609','22529709759','22565081741','22565100605','22565100797','22565109561','22529554770','22529561826','22529701547','22529701555','22529709235','22529709655','22565068393','22565081579','22565081667','22565081735','22565100611','22565100695','22565109227','22565109261','22565109361','22529561984','22529701373','22529709519','22529709571','22529709659','22565068595','22565068603','22565081509','22565081653','22565100445','22565100853','22565101201','22565109041','22565109391','22529554598','22529561870','22529700823','22529700979','22529701505','22565067749','22565067849','22565068243','22565068593','22565081839','22565100727','22565109699','22529555054','22529555130','22529561520','22529561762','22529561970','22529700849','22529701309','22529701325','22529709099','22529709239','22565067703','22565068583','22565081949','22565101065','22565108929','22565109625','22565109643','22529554822','22529554948','22529555214','22529561498','22529701173','22529709487','22529709523','22529709627','22565067837','22565067909','22565081503','22565081809','22565108965','22565109485','22565109525','22529555048','22529561380','22529561570','22529701457','22529701625','22529709297','22529709461','22529709705','22565067707','22565082193','22565100607','22565101347','22529554492','22529554746','22529561606','22529562070','22529701265','22529709333','22565067931','22565068253','22565081695','22565100637','22565100687','22565100855','22565101329','22529554966','22529554984','22529561740','22529562022','22529701665','22529709511','22529709537','22529709703','22529709753','22565068299','22565068493','22529554780','22529561684','22529561786','22529561810','22529701049','22529701193','22529701243','22529701359','22529709557','22565067741','22565068125','22565068133','22565068509','22565068519','22565068559','22565081501','22565100517','22565100621','22565101299','22565109425','22565109547','22529554698','22529555076','22529555136','22529561626','22529701261','22529701311','22529701631','22565068081','22565068217','22565068441','22565081863','22565100581','22565100775','22565100905','22565101003','22529554606','22529554970','22529554980','22529555288','22529555308','22529561916','22529562146','22529700985','22529701153','22529701569','22529701585','22529701641','22529709807','22529709817','22565067847','22565067973','22565068169','22565068333','22565082047','22565101013','22565101097','22565109419','22565109593','22565109603','22529555068','22529561700','22529562104','22529562108','22529701065','22565068327','22565068375','22565068469','22565068499','22565068565','22565081861','22565100791','22565100833','22529555070','22529555100','22529555122','22529555278','22529561502','22529561754','22529561938','22529561966','22529700891','22529709569','22529709653','22529709663','22565067771','22565068111','22565068373','22565081937','22565100821','22565100911','22565101131','22565109217','22565109601','22529554884','22529561516','22529700975','22529701313','22529701675','22529709459','22565068359','22565068471','22565081843','22565082209','22565082233','22565100495','22565100785','22565101133','22565109533','22529554490','22529555300','22529561756','22529701045','22529701163','22529701563','22565067743','22565068221','22565068343','22565081677','22565081929','22529561726','22529700859','22565067851','22565068279','22565068329','22565068549','22565081455','22565081957','22565100443','22565100497','22565100513','22565101087','22565109099','22565109315','22529554458','22529554754','22529554936','22529561714','22529561806','22529562102','22529701477','22529709401','22529709661','22565067921','22565068301','22565081659','22565100473','22565101303','22565108923','22529554512','22529554766','22529561436','22529700833','22529700843','22529701597','22529701623','22529709163','22565081769','22565081887','22565101059','22565101159','22565108933','22565109069','22565109211','22565109233','22565109243','22529555106','22529561414','22529561830','22529700973','22529701213','22529701577','22529709265','22529709287','22529709439','22529709445','22529709587','22529709645','22565067687','22565067965','22565068153','22565068335','22565068513','22565081733','22565081921','22565100715','22565108969','22565109691','22529554478','22529554824','22529555302','22529561614','22529561878','22529701007','22529701029','22529701231','22565067773','22565100807','22565100931','22565101217','22565109173','22565109707','22565109709','22529554930','22529701205','22529701649','22529709589','22529709797','22565068427','22565068463','22565082099','22565100481','22565100645','22565100681','22565100949','22565100961','22565101123','22565101265','22529555016','22529555030','22529555034','22529561992','22529701037','22529701689','22529709547','22529709613','22529709693','22565067861','22565081599','22565081973','22565082203','22565100459','22565100971','22565101037','22565101313','22565109363','22529561364','22529561556','22529701479','22529709255','22529709359','22529709509','22529709773','22565067727','22565067897','22565068061','22565068131','22565068507','22565081595','22565081737','22565081837','22565082185','22565109047','22565109449','22565109481','22529554828','22529555026','22529555040','22529555072','22529561922','22529700861','22529701209','22529701395','22529701409','22529709295','22529709385','22565067767','22565068357','22565081739','22565081935','22565082055','22565109133','22565109503','22529700969','22529701043','22529701259','22529701331','22565109697','22529554434','22529554842','22529561572','22529561696','22529701187','22529709539','22565068249','22565068505','22565081693','22565081779','22565082109','22565101157','22565101335','22565109433','22529554438','22529555140','22529561956','22529700875','22529700971','22529701693','22529709549','22565101119','22565109259','22565109537','22529554642','22529554662','22529555164','22529561416','22529561628','22529700989','22529701051','22529701237','22529709251','22529709343','22529709425','22565067881','22565082097','22565100973','22565100985','22565101009','22565101017','22565109255','22529555268','22529561426','22529700947','22529701279','22529701535','22529709405','22565067925','22565067937','22565068247','22565101251','22565101253','22565109701','22529554594','22529555024','22529555142','22529562048','22529562122','22529709701','22529709789','22565068137','22565068477','22565081685','22565081981','22565082051','22565100519','22565100649','22565100725','22565101029','22565101075','22565101103','22565108913','22565109155','22529555098','22529555236','22529561406','22529701017','22529701241','22529701267','22529709851','22565067979','22565068039','22565081687','22565082033','22565082139','22565082141','22565100875','22565101179','22565109353','22565109623','22529554472','22529554752','22529555168','22529562050','22529562120','22529701443','22565067697','22565067927','22565068109','22565101043','22565108915','22565108967','22529554700','22529554946','22529555006','22529561574','22529562110','22529562124','22529700853','22529701023','22529701129','22529701381','22529701437','22529701639','22529709335','22529709861','22565067889','22565068201','22565068281','22565081897','22565082001','22565082039','22565100507','22565108945','22565109137','22565109355','22529561620','22529561662','22529700813','22529701077','22529701189','22529701319','22565067975','22565081953','22565082085','22565100773','22565100837','22565101203','22565109171','22529555224','22529561920','22529562054','22529701449','22529701667','22529709395','22529709545','22529709617','22529709649','22529709739','22565068577','22565081593','22565081805','22565081977','22565082013','22565100721','22565100881','22565101177','22565109409','22565109583','22565109659','22529554498','22529554914','22529561934','22529709145','22529709155','22529709267','22529709643','22529709771','22529709879','22565067717','22565081651','22565082003','22565100643','22565108983','22565109225','22529554626','22529555038','22529555280','22529700981','22529701135','22529701245','22529701519','22529709301','22565082053','22565100989','22565109291','22529554482','22529554788','22529555198','22529561602','22529561658','22529561848','22529562116','22529701561','22529709565','22565067853','22565068551','22565081795','22565100793','22565100951','22565109085','22565109385','22529554620','22529554882','22529555028','22529561926','22529561942','22529701251','22529701515','22529709563','22529709577','22565067721','22565081589','22565082059','22565082165','22529554674','22529701493','22529701621','22529709833','22565068037','22565081631','22565081657','22565081913','22565108961','22565108987','22565109629','22529554622','22529554670','22529554986','22529561952','22529562040','22529562046','22529562126','22529562152','22565068161','22565068245','22565101259','22565108921','22565109061','22565109215','22565109569','22529554820','22529555152','22529561412','22529561528','22529561940','22529561950','22529562034','22529700855','22529701121','22529709431','22529709853','22565067719','22565100487','22565100595','22565101021','22565101061','22565101257','22565101285','22565101291','22565108957','22565108975','22565109417','22529554580','22529554784','22529561722','22529561888','22529701307','22529701397','22529709349','22529709829','22565067865','22565068095','22565068453','22565081895','22565100777','22565109301','22565109323','22565109497','22565109505','22529554608','22529554656','22529701217','22529709841','22565068495','22565068561','22565081915','22565100461','22565100707','22565100977','22565101175','22565109057','22565109367','22529554452','22529555090','22529555206','22529561752','22529561980','22529701507','22529701629','22529709113','22529709341','22565068585','22565100655','22565100885','22529555162','22529561434','22529561708','22529561804','22529561988','22529700867','22529709867','22565068257','22565081855','22565081867','22565082187','22565101093','22565101163','22565109649','22529554932','22529561936','22529562138','22565067863','22565068101','22565068255','22565068363','22565081821','22565082095','22565082213','22565100933','22565109501','22529555088','22529555182','22529555216','22529555222','22529561884','22529709481','22565067895','22565068331','22565081705','22565100433','22565101047','22529554814','22529701057','22529701281','22529701327','22529709427','22529709475','22529709733','22565068535','22565081577','22565100439','22565100713','22565100857','22565108973','22565109201','22565109651','22529554454','22529554860','22529554906','22529561536','22529561544','22529561886','22529700811','22529701289','22529701387','22565068041','22565082217','22529554450','22529554640','22529554790','22529555104','22529561784','22529561862','22529562032','22529701683','22529709593','22565067753','22565068317','22565081573','22565082065','22565100909','22565109325','22565109545','22529555248','22529561608','22529700959','22529701527','22529701545','22529701599','22529709421','22529709471','22565082129','22565100689','22565100967','22565101235','22565109089','22565109605','22529561390','22529562036','22529562058','22529701481','22529701497','22529701537','22529701611','22529709725','22529709783','22565068119','22565068575','22565081613','22565082017','22565100913','22565100947','22565101161','22565101269','22565101359','22565109045','22565109285','22565109549','22565109595','22529555056','22529561724','22529562084','22529701557','22529701573','22529709761','22565068277','22565068283','22565068571','22565081803','22565082195','22565100823','22565101287','22565109521','22565109539','22529554982','22529555064','22529555166','22529561518','22529561564','22529561822','22529701233','22565067691','22565067887','22565068165','22565068303','22565068467','22565068581','22565081679','22565100685','22565109237','22565109297','22529555176','22529561370','22529561512','22529561554','22529562074','22529701461','22565067971','22565068369','22565068451','22565081665','22565082101','22565100491','22565109567','22565109633','22529555086','22529555204','22529561730','22529561996','22529701137','22529701489','22529701661','22529709283','22565067869','22565068147','22565068259','22565068379','22565081909','22565082077','22565082189','22565109091','22565109109','22529561500','22529561524','22529561766','22529562114','22529701403','22529701691','22529709137','22529709687','22565068183','22565068605','22565081489','22565082073','22565100835','22529554636','22529561738','22529701013','22529701081','22529709311','22529709357','22529709707','22529709881','22565068599','22565081663','22565100683','22565109585','22565109665','22529554838','22529554952','22529555000','22529561856','22529701175','22529701207','22529701427','22565067945','22565068117','22565068367','22565068483','22565081753','22565100431','22565100627','22565100883','22565101353','22529554802','22529554810','22529555200','22529561652','22529562000','22529709317','22565068191','22565068263','22565068523','22565068525','22565081473','22565081975','22565082173','22565109319','22529554440','22529554696','22529554808','22529554862','22529555116','22529555212','22529561612','22529700871','22529709697','22565067845','22565068145','22565068229','22565068527','22565081847','22565108941','22565109431','22529554680','22529561890','22529700955','22529709223','22529709291','22565067705','22565067759','22565081903','22565082007','22565100515','22565100891','22565100981','22565101241','22565109205','22565109349','22529561490','22529561672','22529561850','22529700945','22529701509','22529709769','22529709787','22529709875','22565068193','22565081513','22565082149','22565100991','22565101095','22565101249','22565109305','22565109577','22565109631','22565109703','22529561378','22529561610','22529709345','22529709499','22529709591','22529709651','22529709763','22565081641','22565109145','22565109465','22565109499','22529554446','22529554804','22529561372','22529562018','22529700987','22529701015','22529701257','22529701619','22529709347','22529709397','22529709685','22529709713','22565068115','22565081917','22565101053','22565101281','22565101327','22565109379','22565109615','22529554480','22529561428','22529561530','22529561716','22529701271','22529701473','22529709595','22529709747','22565067857','22565068175','22565068423','22565100639','22565100899','22565101207','22565109489','22529554678','22529561692','22529561728','22529561858','22529562118','22565100717','22565108943','22565109055','22565109251','22529554876','22529561720','22529701067','22529701441','22529701551','22529709409','22565068083','22565081919','22565082153','22565100591','22565100699','22565101115','22565108927','22565109473','22565109599','22565109619','22529555188','22529561568','22529561928','22529562014','22529701247','22529701587','22529709581','22565067985','22565068085','22565100867','22565100941','22565109141','22565109421','22529554972','22529561566','22529561686','22529561736','22529700857','22529709669','22529709671','22565068171','22565068199','22565100587','22565101069','22565109397','22565109511','22529554750','22529554950','22529561776','22529561948','22529562060','22529700967','22529701501','22529701607','22529709147','22529709217','22529709493','22565068415','22565081459','22565101023','22565101233','22565109097','22565109575','22529554910','22529555144','22529555256','22529561820','22529561912','22529709619','22565068091','22565068251','22565068501','22565081453','22565081869','22565081967','22565100599','22565100705','22565109531','22529554890','22529555208','22529561404','22529561770','22529562026','22529562130','22529701341','22529701521','22529709823','22565081581','22565081697','22565081799','22565081945','22565081959','22565082207','22565101117','22565101213','22565109231','22529554768','22529561664','22529561676','22529700997','22529701383','22529701503','22529701637','22529701657','22529709107','22529709249','22565067957','22565082103','22565101153','22565101211','22565109519','22529554654','22529554956','22529555052','22529561974','22529562072','22529700841','22529701165','22529701225','22529709305','22529709757','22565068089','22565101171','22565101227','22565109645','22529554448','22529555066','22529561654','22529561986','22529701353','22529709313','22529709583','22529709775','22565067739','22565081807','22565082093','22565082237','22565100825','22529554694','22529561494','22529561864','22529701123','22529709219','22529709307','22529709607','22565068267','22565081465','22565081481','22565081625','22565100945','22565101089','22565109527','22529554582','22529554666','22529561712','22529562128','22529562140','22529701145','22529701389','22529701425','22529701511','22529701543','22529701553','22529709245','22565067757','22565068445','22565068589','22565082145','22565100963','22565101011','22565109115','22529554510','22529554682','22529554742','22529555202','22529561586','22529700837','22529709139','22529709517','22529709859','22565067763','22565067977','22565068381','22565068569','22565081747','22565081773','22565081845','22565082035','22565100463','22565100579','22565100679','22565109043','22565109163','22565109175','22529555160','22529555272','22529562142','22529701349','22529709845','22565067745','22565081515','22565100483','22565100895','22565101109','22565109553','22565109609','22529554812','22529554874','22529554922','22529554944','22529555002','22529555018','22529561388','22529561582','22529701185','22529701287','22529709271','22529709441','22529709605','22565068065','22565068387','22565068475','22565068547','22565082061','22565082143','22565082225','22565100903','22565109143','22565109555','22529554836','22529561382','22529561818','22529561944','22529561968','22529701027','22565067841','22565067943','22565081905','22565101077','22565101355','22565109395','22529554612','22529554988','22529561432','22529562136','22529700817','22529701329','22529709489','22565068235','22565100831','22565100925','22565101027','22565109083','22529554476','22529554916','22529561814','22565068233','22565081681','22565081765','22565081883','22565100859','22565100953','22565101081','22565108935','22565109161','22565109279','22565109381','22529561526','22529561704','22529701021','22529701059','22565068447','22565082121','22565082155','22565100583','22565101149','22565101317','22565109257','22529554868','22529555118','22529701347','22529709781','22565068361','22565068579','22565081647','22565100805','22565101041','22565101221','22565109103','22565109491','22529554634','22529561774','22529701201','22529709097','22529709435','22565067713','22565068587','22565082205','22565100733','22565100827','22565101055','22565109053','22565109375','22565109483','22565109541','22565109617','22529554748','22529555074','22529555180','22529555274','22529561522','22529701333','22529709121','22565081623','22565081763','22565081777','22565081853','22565081971','22565100509','22565101245','22565108931','22565109313','22565109373','22565109507','22565109509','22529554692','22529554872','22529555094','22529555260','22529562002','22529701529','22529709281','22565068031','22565068207','22565068557','22565081467','22565100437','22565100819','22529554992','22529555238','22529561698','22529700885','22529709285','22529709327','22529709777','22565067875','22565067947','22565067983','22565068323','22565081963','22565100467','22565101007','22565101101','22565101111','22565109429','22529554578','22529554690','22529561924','22529701419','22529709419','22565067699','22565067905','22565068607','22565082241','22565109139','22565109153','22565109687','22529701255','22529701415','22529709413','22565067893','22565067935','22565068167','22565068377','22565081797','22565100447','22565100623','22565100801','22565100901','22565109065','22529555014','22529555148','22529561994','22529700879','22529701149','22529701465','22529701471','22529701541','22529701677','22529709149','22529709855','22565067867','22565067963','22565068219','22565081495','22565082027','22565082167','22565100449','22565109309','22565109493','22565109495','22529554844','22529555314','22529561872','22529562038','22529700873','22529701133','22565081899','22565082075','22565101051','22529555186','22529555282','22529561496','22529701575','22529709423','22529709543','22565068067','22565068555','22565081605','22565082177','22565082211','22565100603','22565101321','22529554506','22529555078','22529555174','22529709443','22529709621','22529709857','22565067725','22565068465','22529555226','22529555276','22529561718','22529561748','22529561946','22529701467','22529709095','22529709321','22565068541','22565068613','22565100939','22565101079','22565101263','22565109387','22565109661','22565109689','22529554508','22529554850','22529561402','22529561844','22529561964','22529700869','22529701031','22529701445','22529709467','22565067761','22565068601','22565081471','22565081827','22565101035','22565101147','22565108917','22565108971','22565109107','22529554486','22529554652','22529554894','22529709491','22529709675','22565067733','22565068027','22565068071','22565068349','22565068537','22565081701','22565100783','22565100813','22565101191','22565109229','22565109339','22529554744','22529554782','22529554794','22529561660','22529561982','22529562044','22529700847','22529700951','22529709525','22565068611','22565081449','22565081475','22529555128','22529561580','22529562016','22529701285','22529701315','22529701439','22529701687','22529709127','22529709755','22565081487','22565081601','22565081683','22565081785','22565082081','22565100879','22565109683','22529554798','22529700851','22529701451','22529709393','22565067949','22565067969','22565068103','22565081783','22565082115','22565101209','22565109077','22529555044','22529562134','22529701191','22529701379','22529701485','22529709483','22529709801','22529709827','22565067765','22565068297','22565068421','22565108979','22565109093','22565109123','22565109135','22565109445','22529555020','22529562004','22529700889','22565068073','22565068123','22565082067','22565100477','22565100893','22565101145','22565101239','22565109081','22565109127','22565109269','22565109295','22565109637','22565109695','22529554978','22529561368','22529561590','22529561842','22529562086','22529562148','22529701361','22529701385','22529709329','22529709433','22529709731','22565068225','22565068391','22565081511','22565081927','22565081933','22565101031','22565109049','22565109563','22565109639','22529554500','22529554502','22529554904','22529701253','22565068203','22565068347','22565068545','22565081627','22565100635','22565101293','22565109113','22565109287','22529555156','22529561488','22529561584','22529561772','22529561960','22529562080','22529701589','22529709319','22565067711','22565068319','22565081633','22565081893','22565100441','22565101073','22565101307','22565109067','22529555138','22529700865','22529701003','22529701393','22529701411','22529709111','22529709167','22529709711','22565067917','22565068341','22565068411','22565081675','22565082029','22565100479','22565100781','22565101005','22565101167','22565108953','22565109347','22565109369','22529561384','22529701323','22529701633','22529709225','22529709573','22565067915','22565068141','22565081787','22565082057','22565082235','22529555158','22529555196','22529562094','22529701009','22529701469','22529701579','22565068149','22565068405','22565081519','22565100601','22565101295','22565108925','22529554436','22529555190','22529561962','22529701317','22529701355','22529701671','22529709585','22529709699','22529709871','22565081901','22565081955','22565082159','22565100703','22565100919','22565100929','22565101165','22565101277','22565101349','22565108937','22565109167','22529555192','22529561408','22529561538','22529700877','22529701125','22529709463','22565067933','22565067941','22565067953','22565068105','22565068127','22565068265','22565081817','22565081819','22565100457','22565100675','22565101137','22565109559')")

In [233]:
CL02M_source_corrige.count()

941839

In [236]:
CL02M_source_corrige.drop("LADM_DMBTR","TWBTRG","LADM_TWBTRG").subtract(CL02M_res.drop("LADM_DMBTR","TWBTRG","LADM_TWBTRG")).count()

0

In [237]:
CL02M_source_corrige.registerTempTable("cl02m_src")

In [238]:
CL02M_res.registerTempTable("cl02m_res")

In [239]:
query = """
select CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM cl02m_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|333267226.770010050000000                               |
+--------------------------------------------------------+



In [240]:
query = """
select CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM cl02m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|333267226.769999100000000                               |
+--------------------------------------------------------+



In [242]:
query = """
select CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
FROM cl02m_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|9749632.437939582000000                                |
+-------------------------------------------------------+



In [243]:
query = """
select CAST(SUM(LADM_DMBTR) AS DECIMAL(38,15))
FROM cl02m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+-------------------------------------------------------+
|CAST(sum(CAST(LADM_DMBTR AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|9749651.789999813000000                                |
+-------------------------------------------------------+



In [157]:
CL05M_source.registerTempTable("cl05m_src")
CL05M_res.registerTempTable("cl05m_res")

In [244]:
query = """ 
SELECT CAST(SUM(TWBTRG) AS DECIMAL(38,15))
from cl05m_src
"""
df = sqlContext.sql(query)
df.show(1,False)


+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|456056376.309994500000000                          |
+---------------------------------------------------+



In [245]:
query = """ 
SELECT CAST(SUM(TWBTRG) AS DECIMAL(38,15))
from cl05m_res
"""
df = sqlContext.sql(query)
df.show(1,False)


+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|456056376.309996500000000                          |
+---------------------------------------------------+



# cl03m

In [266]:
CL03M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/CL03M/LDM_PRD_FRA_CL03M_201007_093812.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CL03M_source.count()))
basl_max_source = CL03M_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = CL03M_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
CL03M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0607_PRD_20201007_20201007105221_CL03M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CL03M_res.count()))
basl_max_extract = CL03M_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = CL03M_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 5608
basl out id max source : 22601156913
basl out id min source : 22316556993
nombre lignes extract : 5250
basl out id max extract : 22601156913
basl out id min extract : 22316556993


In [267]:
5608 - 5250

358

In [278]:
df_ecarts = CL03M_source.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT").subtract(CL03M_res.select("BASL_OUT_ID","BUKRS","GJAHR","MONAT", "RACCT"))

In [279]:
df_ecarts.registerTempTable("cl03m_ecart")

In [274]:
df_ecarts.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/analyse_cl03m/cl03.csv",header=True,sep=';')

In [280]:
query = """
select basl.BASL_OUT_ID, basl.LDGRP, basl.CPUDT
FROM BASL_AZF_DATE_CPT AS basl
RIGHT JOIN cl03m_ecart AS cl03m ON basl.BASL_OUT_ID = cl03m.BASL_OUT_ID
"""
df = sqlContext.sql(query)
df.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/analyse_cl03m/cl03_ecarts.csv",header=True,sep=';')

In [281]:
CL03M_source_corrige = CL03M_source.filter("BASL_OUT_ID NOT IN ('22529700809','22565108883','22529554422','22529700927','22565109181','22565108907','22565081431','22529700791','22565109009','22565100561','22565108893','22529561440','22529709191','22565067667','22565068001','22565067797','22565067807','22529561468','22529561470','22565067821','22529709189','22529554552','22565108997','22565100557','22529554412','22565067799','22565100539','22565109005','22529700799','22565067785','22565100553','22565100415','22565067783','22565081425','22565081409','22565109029','22565100523','22565100531','22529709073','22529700921','22565100567','22565068003','22565067795','22529554562','22529554716','22529554540','22565081423','22565081545','22529561484','22529554560','22565100571','22529701085','22529700941','22529709213','22565067995','22565100541','22565067803','22565081555','22529709361','22565100563','22529709173','22529700943','22529554428','22529709087','22529709071','22565081539','22565067999','22529709089','22565081533','22529709083','22529709075','22565081709','22529561452','22565067817','22565100537','22565108879','22565108901','22565100419','22529561344','22529554564','22529701091','22565067825','22565100555','22529554720','22529700897','22565100743','22565067997','22565109003','22529709207','22565109027','22565081557','22529561338','22529709177','22565109037','22565081569','22565067677','22565081417','22565081427','22529700933','22565100575','22529709199','22565100533','22565108891','22565067683','22565108881','22529700939','22529709215','22565100745','22529701089','22529554526','22565081433','22565100423','22529701097','22565100749','22529709195','22565109001','22529701101','22529554416','22529554714','22529554706','22565100577','22565108903','22529709179','22565081561','22565100741','22529701099','22529701093','22565108911','22529561448','22529709205','22529561464','22565081529','22565081553','22529700909','22565067671','22565081559','22565100739','22565108993','22565067679','22565067791','22565108899','22565067781','22529554548','22565100565','22529709211','22565067805','22529700803','22529554546','22565108897','22529709203','22565100529','22529561450','22565109025','22529561348','22529700807','22529700925','22565108895','22529561454','22565109013','22529561458','22565067993','22529700895','22565068005','22529709187','22565067991','22529709081','22529554426','22529709169','22565081429','22565109011','22565108889','22529561444','22565067787','22529554534','22529709175','22529561480','22529554556','22529554554','22529700797','22529700793','22529701087','22529700903','22529561360','22565081415','22529561340','22529700893','22565081551','22565100547','22529561342','22529561456','22565067673','22529554520','22565081411','22529554722','22529561352','22565081421','22529554524','22565067681','22529709193','22529554514','22565067675','22529700911','22565081527','22529700913','22529561630','22565109035','22529554710','22565067789','22529554430','22529709093','22565109017','22565108999','22529561460','22565081547','22565100427','22529709091','22529709085','22529700929','22529561478','22529554718','22529700937','22529700935','22565081563','22565067811','22529709069','22529554418','22529554550','22565081523','22529554558','22529561442','22565100535','22529700899','22529561356','22565067801','22529561632','22529561466','22529709185','22565067669','22565067819','22529700795','22529561438','22529709201','22565067815','22565067685','22565081567','22565067823','22565067809','22529561482','22565067827','22565100747','22529554542','22529709067','22565067833','22565109007','22529554518','22565100569','22565108991','22565100737','22565100545','22565081437','22565108877','22565081711','22565067779','22565100549','22529554538','22529709077','22529700901','22565081435','22529709183','22529554532','22529561354','22565067829','22565067813','22529561358','22529561336','22529700801','22529700931','22565109019','22529554414','22565100411','22529700915','22529709365','22529554544','22565108905','22565109033','22529554516','22529561362','22565108885','22565081419','22529709181','22529709197','22565100417','22565068007','22565067793','22565081439','22565109031','22529554708','22565100421','22529561472','22529554424','22565081541','22565100559','22529709171','22565067777','22565100429','22529561474','22565067831','22565081535','22529554530','22529561476','22565109023','22529554712','22565100543','22565109015','22565100735','22565100527','22565109179','22565108887','22565109177','22529700905','22529701095','22565081525','22565100525','22565100573','22529700919','22565081443','22565081549','22529709209','22565081565','22529700805','22565081713','22565100751','22529561350','22565108995','22529709079','22529561346','22565081537','22565081543','22529700923','22529554420','22529700917','22529700907','22565100551','22565100413','22565100425','22565108909','22529554536','22565081441','22565081413','22529709363','22529561462','22529554522','22565100521','22529561446','22565109021','22529554528','22529561634','22565081531')")

In [283]:
CL03M_source_corrige.registerTempTable("cl03m_corrige")

In [285]:
CL03M_res.registerTempTable("cl03m_res")

In [288]:
CL03M_source_corrige.drop("LADM_TWBTRG","LADM_DMBTR","TWBTRG").subtract(CL03M_res.drop("LADM_TWBTRG","LADM_DMBTR","TWBTRG")).count()

0

In [284]:
query = """ 
SELECT CAST(SUM(TWBTRG) AS DECIMAL(38,15))
from cl03m_corrige
"""
df = sqlContext.sql(query)
df.show(1,False)

+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|283127881.469999130000000                          |
+---------------------------------------------------+



In [286]:
query = """ 
SELECT CAST(SUM(TWBTRG) AS DECIMAL(38,15))
from cl03m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+---------------------------------------------------+
|CAST(sum(CAST(TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+---------------------------------------------------+
|283127881.469999600000000                          |
+---------------------------------------------------+



In [None]:
query = """ 
SELECT CAST(SUM(TWBTRG) AS DECIMAL(38,15))
from cl03m_corrige
"""
df = sqlContext.sql(query)
df.show(1,False)

In [None]:
IE01D_res.printSchema()

In [None]:
#CL04M

In [290]:
CL04M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/CL04M/LDM_PRD_FRA_CL04M_201007_093814.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CL04M_source.count()))
basl_max_source = CL04M_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = CL04M_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
CL04M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0607_PRD_20201007_20201007105706_CL04M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CL04M_res.count()))
basl_max_extract = CL04M_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = CL04M_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))

nombre lignes source : 89116
basl out id max source : 22586890394
basl out id min source : 22519072559
nombre lignes extract : 89116
basl out id max extract : 22586890394
basl out id min extract : 22519072559


In [293]:
CL04M_res.drop("LADM_TWBTRG","LADM_DMBTR","TWBTRG").subtract(CL04M_source.drop("LADM_TWBTRG","LADM_DMBTR","TWBTRG")).count()

0

In [294]:
CL04M_source.registerTempTable("cl04_src")
CL04M_res.registerTempTable("cl04_res")

In [297]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from cl04_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-110103347.570000480000000                              |
+--------------------------------------------------------+



In [298]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from cl04_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-110103347.569952960000000                              |
+--------------------------------------------------------+



In [248]:
CM01M_source.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
+-----+



In [None]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from cl03m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

In [82]:
#B43 
B43_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/B43/LDM_PRD_FRA_B43QT_201006_013602.CSV", header=True, sep=";")
print("nombre lignes source : " + str(B43_source.count()))
basl_max_source = B43_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = B43_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
B43_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/B43/GY06_PRD_BI_PRD_20201005_20201005015226_B43QT_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(B43_res.count()))
basl_max_extract = B43_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = B43_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 8
basl out id max source : 21995885628
basl out id min source : 21056037350
nombre lignes extract : 8
basl out id max extract : 21995885628
basl out id min extract : 21056037350


In [84]:
B43_source.drop("CHANGE_DATE").subtract(B43_res.drop("CHANGE_DATE")).count()

0

In [87]:
#835
df835_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/835/MEP/AWZEDSY1_GY06_PRD_BI_PRD_20201002_20201002224223_835YD_0004/LDM_PRD_FRA_835YD_201002_223131.CSV", header=True, sep=";")
print("nombre lignes source : " + str(df835_source.count()))
# basl_max_source = df835_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = df835_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
df835_res = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/835/MEP/AWZEDSY1_GY06_PRD_BI_PRD_20201002_20201002224223_835YD_0004/GY06_PRD_BI_PRD_20201002_20201002224223_835YD_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(df835_res.count()))
# basl_max_extract = df835_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = df835_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 3780
nombre lignes extract : 3779


In [90]:
df835_source.drop("TWBTRG").subtract(df835_res.drop("TWBTRG")).show()

+--------+----+------+----+--------+-----+----+----+-----+-----+-----+-----+
|FS_BUKRS|FMK1|FS_DCH| MAY|FS_WAERS|BUBSZ|MDCF| ABZ|BSCHL|YYPRG|YYDCH|YYCSG|
+--------+----+------+----+--------+-----+----+----+-----+-----+-----+-----+
|  601001|229A|   AGT|2020|   00200| null|null|I419|   50|99999|   01|  999|
+--------+----+------+----+--------+-----+----+----+-----+-----+-----+-----+



In [95]:
#AD01M
AD01m_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/LDM_PRD_FRA_AD01M_201005_073406.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD01m_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD01m_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005071154_AD01M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD01m_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 4962675
nombre lignes extract : 4869499


In [106]:
AD01m_source.select("FS_LDGRP").distinct().show()

AnalysisException: "cannot resolve '`FS_LDGRP`' given input columns: [RISK, DATE4, HBSNR, FMK2, SHKZG, DATE3, SNR, BUPRT, BUDAT, BUKRS, MDCF, PRODSCHL, GSA, GSAKAT, FS_HWAERS, GJAHR, BLDAT, SUBRISK, CPUDT, AJ, RACCT, ZZONR, BEC_ERKENNUNG3, BELDAT, LADM_TWBTRG, MONAT, FMK1];;\n'Project ['FS_LDGRP]\n+- Relation[BUKRS#18222,GJAHR#18223,MONAT#18224,RACCT#18225,AJ#18226,BEC_ERKENNUNG3#18227,BELDAT#18228,BLDAT#18229,BUDAT#18230,BUPRT#18231,CPUDT#18232,DATE3#18233,DATE4#18234,FMK1#18235,FMK2#18236,FS_HWAERS#18237,GSA#18238,GSAKAT#18239,HBSNR#18240,MDCF#18241,PRODSCHL#18242,RISK#18243,SHKZG#18244,SNR#18245,... 3 more fields] csv\n"

In [101]:
AD01m_res.select("RACCT").distinct().show()

+----------+
|     RACCT|
+----------+
|5111001060|
|5111001070|
|5111001008|
|5111001009|
|5111001049|
+----------+



In [104]:
#AD02M
AD02M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD02M.csv/", header=True, sep=";")
print("nombre lignes source : " + str(AD02M_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD02M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005071236_AD02M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD02M_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 27837340
nombre lignes extract : 12280219


In [107]:
AD02M_source.select("FS_LDGRP1").distinct().show()

+---------+
|FS_LDGRP1|
+---------+
|     null|
+---------+



In [300]:
#AD02M
AD02M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD02M.csv/", header=True, sep=";")
print("nombre lignes source : " + str(AD02M_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD02M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005071236_AD02M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD02M_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 27837340
nombre lignes extract : 12280219


In [303]:
AD02M_source.registerTempTable("ad02m_src")
AD02M_res.registerTempTable("ad02m_res")

In [307]:
AD02M_source.select("FS_LDGRP1").distinct().show()

+---------+
|FS_LDGRP1|
+---------+
|     null|
+---------+



In [308]:
AD02M_res.select("FS_LDGRP1").distinct().show()

+---------+
|FS_LDGRP1|
+---------+
|     null|
+---------+



In [309]:
27837340 - 12280219

15557121

In [311]:
AD02M_source.drop("LADM_TWBTRG").subtract(AD02M_res.drop("LADM_TWBTRG")).count()

0

In [313]:
AD02M_res.subtract(AD02M_source).count()

4222747

# AD01M

In [314]:
#AD02M
AD01M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD01M/LDM_PRD_FRA_AD01M_201005_073406.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD01M_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD01M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005071154_AD01M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD01M_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 4962675
nombre lignes extract : 4869499


In [317]:
AD01M_res.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
|   K7|
+-----+



In [322]:
df_ad01M_corrige_res = AD01M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")

In [323]:
AD01M_source.registerTempTable("ad01m_src")
df_ad01M_corrige_res.registerTempTable("ad01m_res")

In [324]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad01m_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-854858593.380008600000000                              |
+--------------------------------------------------------+



In [325]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad01m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-854858593.380001700000000                              |
+--------------------------------------------------------+



In [304]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad02m_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-2071679515.199690600000000                             |
+--------------------------------------------------------+



In [305]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad02m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-823716423.758586400000000                              |
+--------------------------------------------------------+



In [248]:
CM01M_source.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
+-----+



# AD08M

In [327]:
#AD02M
AD08M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD08M/LDM_PRD_FRA_AD08M_201005_073400.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD08M_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD08M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005072722_AD08M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD08M_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 308068
nombre lignes extract : 36537


In [329]:
df_ad08M_corrige_res = AD08M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")

In [330]:
df_ad08M_corrige_res.count()

36376

In [331]:
AD08M_source.registerTempTable("ad08m_src")
df_ad08M_corrige_res.registerTempTable("ad08m_res")

In [332]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad08m_src
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|930910768.530002700000000                               |
+--------------------------------------------------------+



In [333]:
query = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad08m_res
"""
df = sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|930910768.529997800000000                               |
+--------------------------------------------------------+



# AD10M

In [335]:

AD10M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD10M/LDM_PRD_FRA_AD10M_201005_073359.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD10M_source.count()))
# basl_max_source = AD01m_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = AD01m_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
AD10M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005073239_AD10M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD10M_res.count()))
# basl_max_extract = AD01m_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = AD01m_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 333519
nombre lignes extract : 6395


In [337]:
df_ad10M_corrige_res = AD10M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")
AD10M_source.registerTempTable("ad10m_src")
df_ad10M_corrige_res.registerTempTable("ad10m_res")

query1 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_src
"""
df1 = sqlContext.sql(query1)


query2 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_res
"""
df2 = sqlContext.sql(query2)

df1.show(1,False)
df2.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-81749753.339999440000000                               |
+--------------------------------------------------------+

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-81749753.339999930000000                               |
+--------------------------------------------------------+



# AD11M

In [341]:
AD10M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD11M/LDM_PRD_FRA_AD11M_201005_073401.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD10M_source.count()))

AD10M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005073341_AD11M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD10M_res.count()))

df_ad10M_corrige_res = AD10M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")
AD10M_source.registerTempTable("ad10m_src")
df_ad10M_corrige_res.registerTempTable("ad10m_res")

query1 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_src
"""
df1 = sqlContext.sql(query1)


query2 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_res
"""
df2 = sqlContext.sql(query2)

df1.show(1,False)
df2.show(1,False)

nombre lignes source : 6983
nombre lignes extract : 2308
+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|25626165.540000170000000                                |
+--------------------------------------------------------+

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|25626165.540000030000000                                |
+--------------------------------------------------------+



In [342]:
AD10M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD12M/LDM_PRD_FRA_AD12M_201005_073411.CSV", header=True, sep=";")
print("nombre lignes source : " + str(AD10M_source.count()))

AD10M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201005_20201005073749_AD12M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD10M_res.count()))

df_ad10M_corrige_res = AD10M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")
AD10M_source.registerTempTable("ad10m_src")
df_ad10M_corrige_res.registerTempTable("ad10m_res")

query1 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_src
"""
df1 = sqlContext.sql(query1)


query2 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_res
"""
df2 = sqlContext.sql(query2)

df1.show(1,False)
df2.show(1,False)

nombre lignes source : 3266055
nombre lignes extract : 33397
+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|111888523.990000100000000                               |
+--------------------------------------------------------+

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|111888523.990000870000000                               |
+--------------------------------------------------------+



# AD17

In [345]:
AD10M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/AD17M.csv", header=True, sep=";")
print("nombre lignes source : " + str(AD10M_source.count()))

AD10M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0703_PRD_20201007_20201007191622_AD17M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(AD10M_res.count()))

df_ad10M_corrige_res = AD10M_res.filter("(LDGRP NOT IN('K7') OR LDGRP IS NULL)")
AD10M_source.registerTempTable("ad10m_src")
df_ad10M_corrige_res.registerTempTable("ad10m_res")

query1 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_src
WHERE RACCT = '2412001224'
"""
df1 = sqlContext.sql(query1)


query2 = """ 
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ad10m_res
"""
df2 = sqlContext.sql(query2)

df1.show(1,False)
df2.show(1,False)

nombre lignes source : 23221876
nombre lignes extract : 15592219
+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|9320.189999999940000                                    |
+--------------------------------------------------------+

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-392794868.830175640000000                              |
+--------------------------------------------------------+



In [344]:
-392792610.450071400000000 -    (-392794868.830175700000000)                       

2258.38010430336

In [339]:
df_ad10M_corrige_res.select("BUKRS").distinct().show()

+-----+
|BUKRS|
+-----+
| 9PFR|
| 9LFR|
+-----+



In [None]:
## CM01D
CM01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/LDM_PRD_FRA_CM01D_200928__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CM01D_source.count()))
# basl_max_source = CM01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = CM01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
CM01D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0611_PRD_20200928_20200928033846_CM01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CM01D_res.count()))
# basl_max_extract = CM01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = CM01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


In [None]:
## CM01D
CM01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/CM01D/LDM_PRD_FRA_CM01D_200930__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CM01D_source.count()))
# basl_max_source = CM01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = CM01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
CM01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0611_PRD_20200930_20200930035357_CM01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CM01D_res.count()))
# basl_max_extract = CM01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = CM01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


In [None]:
CM01D_source.registerTempTable("cm01d_src")
CM01D_res.registerTempTable("cm01d_res")

In [None]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM cm01d_src
"""
dfCM01D= sqlContext.sql(query)
dfCM01D.show(1,False)

In [None]:
query = """
SELECT CAST(SUM(EDS_TWBTRG) AS DECIMAL(38,15))
FROM cm01d_res

"""
dfCM01D= sqlContext.sql(query)
dfCM01D.show(1,False)

In [None]:
dfCM01D.show(1,False)

In [None]:
CM01D_source.select("FMK1").distinct().show(34)

In [None]:
CM01D_res.select("FMK1").distinct().show(34)

In [None]:
query = """
SELECT RACCT, ZZONR, MDCF,SUM(LADM_TWBTRG)
FROM cm01d_src
GROUP BY RACCT, ZZONR, MDCF
"""
df_tcd = sqlContext.sql(query)
df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd.csv,",header=True,sep=';')

In [None]:
query = """
SELECT RACCT, ZZONR, MDCF,SUM(EDS_TWBTRG)
FROM cm01d_res
GROUP BY RACCT, ZZONR, MDCF
"""
df_tcd = sqlContext.sql(query)
df_tcd.repartition(1).write.csv("/data/dropbox/larcher/INFOMAPS/CM01D/tcd_res.csv,",header=True,sep=';')

In [None]:
CM01D_source.select("BUKRS","GJAHR","MONAT","RACCT","BEC_ERKENNUNG1","BELNR","BEWAR","BLART","BUBSZ").subtract(CM01D_res.select("BUKRS","GJAHR","MONAT","RACCT","BEC_ERKENNUNG1","BELNR","BEWAR","BLART","BUBSZ")).count()

In [None]:
## CM01D
CM01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/CM01D/LDM_PRD_FRA_CM01D_201001__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CM01D_source.count()))
# basl_max_source = CM01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = CM01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
CM01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0611_PRD_20201001_20201001043754_CM01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CM01D_res.count()))
# basl_max_extract = CM01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = CM01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


In [None]:
CM01D_source.select("LDGRP").distinct().show()

In [None]:
CM01D_res.select("LDGRP").distinct().show()

In [247]:
## CM01M
CM01M_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/CM01M/LDM_PRD_FRA_CM01M_201001__1_of_1.CSV", header=True, sep=";")
print("nombre lignes source : " + str(CM01M_source.count()))
# basl_max_source = CM01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = CM01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
CM01M_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0611_PRD_20201001_20201001044946_CM01M_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(CM01M_res.count()))
# basl_max_extract = CM01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_extract = CM01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max extract : " + str(basl_max_extract))
# print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 4158701
nombre lignes extract : 3655673


In [255]:
CM01M_source.registerTempTable("CM01M_source")
CM01M_res.registerTempTable("CM01M_res")

In [258]:
query = """
SELECT CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
FROM CM01M_source

"""
df= sqlContext.sql(query)
df.show(1,False)

+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|-147413880.779997530000000                              |
+--------------------------------------------------------+



In [257]:
query = """
SELECT CAST(SUM(EDS_TWBTRG) AS DECIMAL(38,15))
FROM CM01M_res

"""
dfCM01D= sqlContext.sql(query)
dfCM01D.show(1,False)

+-------------------------------------------------------+
|CAST(sum(CAST(EDS_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+-------------------------------------------------------+
|-147413880.779942570000000                             |
+-------------------------------------------------------+



In [248]:
CM01M_source.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
+-----+



In [None]:
query = """
SELECT CAST(SUM(EDS_TWBTRG) AS DECIMAL(38,15))
FROM cm01d_res

"""
dfCM01D= sqlContext.sql(query)
dfCM01D.show(1,False)

In [None]:
CM01M_source. filter("LDGRP= ")

In [None]:
## IG01D
IG01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/LDM_PRD_FRA_IG01D_200928_031502.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IG01D_source.count()))
basl_max_source = IG01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IG01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IG01D_res = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/GY05_PRD_GY0605_PRD_20200928_20200928133239_IG01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IG01D_res.count()))
basl_max_extract = IG01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IG01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
IG01D_source.select("ZZONR").distinct().show()

In [112]:
## IG01D
IG01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IG01D/LDM_PRD_FRA_IG01D_200929_031502.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IG01D_source.count()))
basl_max_source = IG01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IG01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IG01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0605_PRD_20200929_20200929051642_IG01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IG01D_res.count()))
basl_max_extract = IG01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IG01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 204434
basl out id max source : 22555519256
basl out id min source : 22310956579
nombre lignes extract : 203602
basl out id max extract : 22555519256
basl out id min extract : 22310956579


In [None]:
IG01D_source.printSchema()

In [111]:
## IG01D
IG01D_source = spark.read.csv("/data/dropbox/larcher/INFOMAPS/MEP/source/IG01D/LDM_PRD_FRA_IG01D_200930_031502.CSV", header=True, sep=";")
print("nombre lignes source : " + str(IG01D_source.count()))
basl_max_source = IG01D_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = IG01D_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
IG01D_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY05_PRD_GY0605_PRD_20200930_20200930050414_IG01D_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(IG01D_res.count()))
basl_max_extract = IG01D_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = IG01D_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes source : 214963
basl out id max source : 22559711435
basl out id min source : 22310956579
nombre lignes extract : 214126
basl out id max extract : 22559711435
basl out id min extract : 22310956579


In [113]:
IG01D_source.select("ZZONR").distinct().show()

+---------+
|    ZZONR|
+---------+
|   CLESTD|
|   TPCTNV|
|   SIPFAI|
|   ABSVEI|
|   REAEST|
|  AECSAPE|
|     AECS|
|ABSCLAIMS|
|   TPCTAE|
|   TPCGNV|
|   GCPSYS|
|   REASSU|
|  AECSCOA|
|   RENTES|
|   COACES|
|  AECSREA|
+---------+



In [None]:
IG01D_res.select("ZZONR").distinct().show()

In [114]:
IG01D_source.filter("ZZONR NOT IN ('REAEST','REASSU') and LDGRP NOT IN('I1','M1','K4')").count()

0

In [122]:
df_source_corrige = IG01D_source.filter("ZZONR NOT IN ('REAEST','REASSU')")

In [120]:
IG01D_source.filter("LDGRP NOT IN('I1','M1','K4')").count()

181

In [119]:
df_source_corrige.filter("LDGRP NOT IN('I1','M1','K4')").count()

0

In [121]:
IG01D_res.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
+-----+



In [128]:
IG01D_source.registerTempTable("ig01d_src")

In [124]:
IG01D_res.registerTempTable("ig01d_res")

In [131]:
query = """
select CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ig01d_src
WHERE ZZONR NOT IN('REAEST','REASSU')
"""
df= sqlContext.sql(query)
df.show(1,False)


+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|179641567.170004550000000                               |
+--------------------------------------------------------+



In [133]:
query = """
select DISTINCT LDGRP 
from ig01d_src
WHERE ZZONR NOT IN('REAEST','REASSU')
"""
df= sqlContext.sql(query)
df.show(20)


+-----+
|LDGRP|
+-----+
| null|
+-----+



In [127]:
query = """
select CAST(SUM(LADM_TWBTRG) AS DECIMAL(38,15))
from ig01d_res
"""
df= sqlContext.sql(query)
df.show(1,False)


+--------------------------------------------------------+
|CAST(sum(CAST(LADM_TWBTRG AS DOUBLE)) AS DECIMAL(38,15))|
+--------------------------------------------------------+
|179641567.170007170000000                               |
+--------------------------------------------------------+



In [None]:
IG01D_source.select("BLART").distinct().show()

In [None]:
IG01D_res.select("BLART").distinct().show()

In [116]:
IG01D_source.select("LDGRP").distinct().show()

+-----+
|LDGRP|
+-----+
| null|
|   I1|
|   M1|
|   L1|
+-----+



In [None]:
IG01D_source.filter("ZZONR NOT IN ('REAEST','REASSU')").drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG").subtract(IG01D_res.drop("ACCRESPID","ACCRESPIDF","LADM_TWBTRG")).show()

In [None]:
dfBASL = spark.read.parquet("/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020")
#dfBASL = spark.read.parquet("/data/dev_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020")
dfBASL.registerTempTable("BASL_AZF_DATE_CPT")

In [None]:
query1 = """SELECT AUDIT_SRC_FILE
            FROM BASL_AZF_DATE_CPT
            WHERE BASL_OUT_ID = '22539931445'
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT BASL_OUT_ID
            FROM skb_24_res
            WHERE BASL_OUT_ID = '22539931445'
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
query1 = """SELECT BASL_OUT_ID
            FROM skb_25_res
            WHERE BASL_OUT_ID = '22539931445'
            """

df_tcd = sqlContext.sql(query1)
df_tcd.show(10,False)

In [None]:
UNION ALL(
SELECT BASL_AZF_DATE_CPT.FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG5,BEC_ERKENNUNG7,FS_BSCHL,
                        FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY,CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,
                        VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT,FS_BLNR2,FS_HWAERS,
                        VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT,
                        WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,BASL_AZF_DATE_CPT.BUKRS,BLDAT,
                        BSCHL,BEWAR,LDGRP,VBUND,YYLOB,PRG,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,BASL_AZF_DATE_CPT.RACCT,A_ALTACCOUNT,SHKZG,BLART,
                        LIFNR,ZZBSZ,BASL_OUT_ID,BASL_AZF_DATE_CPT.CHANGE_DATE,HBSNR,VTNRAB,VTNRRE
FROM parquet.`%s` AS BASL_AZF_DATE_CPT
LEFT OUTER JOIN parquet.`%sDATA_DATE_PARTITION=%s` OWNER_RACCT ON (BASL_AZF_DATE_CPT.RACCT = OWNER_RACCT.RACCT)
LEFT OUTER JOIN parquet.`%sDATA_DATE_PARTITION=%s` AS OWNER_BUKRS ON (BASL_AZF_DATE_CPT.BUKRS = AS OWNER_BUKRS.BUKRS)
LEFT OUTER JOIN parquet.`%sDATA_DATE_PARTITION=%s` AS OWNER_ACCOUNTS ON ((OWNER_RACCT.ID_RACCT = AS OWNER_ACCOUNTS.ID_RACCT) AND (AS OWNER_BUKRS.ID_BUKRS = AS OWNER_ACCOUNTS.ID_BUKRS))
WHERE BUKRS IN ('9LFR','9RFR','NCAL')
    AND ABZ IN ('V700','V701','V702','V708','V710','V711','V712','V713','V714','V71D','V720','V722','V7P0','V7P2','V7Q0','V7Q2','V71E','V721','V730','V732')
    AND FS_DCH IN ('RSAG','MAL','AEL')
    AND PFS NOT LIKE 'VE%'
    AND BUDAT > '%s'
    AND RACCT IN ('2411101013','6111101250','6211002310'))

In [None]:
res_xf01d ="/data/dropbox/larcher/INFOMAPS/XF01D/XF01D.csv"
query1 = """
SELECT BASL_AZF_DATE_CPT.FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG5,BEC_ERKENNUNG7,FS_BSCHL,
                        FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY,CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,
                        VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT,FS_BLNR2,FS_HWAERS,
                        VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT,
                        WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,BASL_AZF_DATE_CPT.BUKRS,BLDAT,
                        BSCHL,BEWAR,LDGRP,VBUND,YYLOB,PRG,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,BASL_AZF_DATE_CPT.RACCT,A_ALTACCOUNT,SHKZG,BLART,
                        LIFNR,ZZBSZ,BASL_OUT_ID,BASL_AZF_DATE_CPT.CHANGE_DATE,HBSNR,VTNRAB,VTNRRE
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` AS BASL_AZF_DATE_CPT

LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-09-22` AS OWNER_RACCT ON (BASL_AZF_DATE_CPT.RACCT = OWNER_RACCT.RACCT)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-09-22`  AS OWNER_BUKRS ON (BASL_AZF_DATE_CPT.BUKRS = OWNER_BUKRS.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-09-22` AS OWNER_ACCOUNTS ON ((OWNER_RACCT.ID_RACCT = OWNER_ACCOUNTS.ID_RACCT) AND (OWNER_BUKRS.ID_BUKRS = OWNER_ACCOUNTS.ID_BUKRS))

WHERE BASL_AZF_DATE_CPT.BUKRS IN ('9LFR','9RFR','NCAL')
    AND ABZ IN ('V700','V701','V702','V708','V710','V711','V712','V713','V714','V71D','V720','V722','V7P0','V7P2','V7Q0','V7Q2','V71E','V721','V730','V732')
    AND FS_DCH IN ('RSAG','MAL','AEL')
    AND PFS NOT LIKE 'VE%'
    AND BUDAT > '%s'
    AND BASL_AZF_DATE_CPT.RACCT LIKE '511100%'
UNION ALL(
SELECT BASL_AZF_DATE_CPT.FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG5,BEC_ERKENNUNG7,FS_BSCHL,
                        FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY,CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,
                        VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT,FS_BLNR2,FS_HWAERS,
                        VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT,
                        WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,BASL_AZF_DATE_CPT.BUKRS,BLDAT,
                        BSCHL,BEWAR,LDGRP,VBUND,YYLOB,PRG,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,BASL_AZF_DATE_CPT.RACCT,A_ALTACCOUNT,SHKZG,BLART,
                        LIFNR,ZZBSZ,BASL_OUT_ID,BASL_AZF_DATE_CPT.CHANGE_DATE,HBSNR,VTNRAB,VTNRRE
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet` AS BASL_AZF_DATE_CPT

LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-RACCT.parquet/DATA_DATE_PARTITION=2020-09-22` AS OWNER_RACCT ON (BASL_AZF_DATE_CPT.RACCT = OWNER_RACCT.RACCT)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-BUKRS.parquet/DATA_DATE_PARTITION=2020-09-22`  AS OWNER_BUKRS ON (BASL_AZF_DATE_CPT.BUKRS = OWNER_BUKRS.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/parsed_data/GY03/VC/GY03_LADM_OWNER-ACCOUNTS_HAS_COMPANIES.parquet/DATA_DATE_PARTITION=2020-09-22` AS OWNER_ACCOUNTS ON ((OWNER_RACCT.ID_RACCT = OWNER_ACCOUNTS.ID_RACCT) AND (OWNER_BUKRS.ID_BUKRS = OWNER_ACCOUNTS.ID_BUKRS))

WHERE BASL_AZF_DATE_CPT.BUKRS IN ('9LFR','9RFR','NCAL')
    AND ABZ IN ('V700','V701','V702','V708','V710','V711','V712','V713','V714','V71D','V720','V722','V7P0','V7P2','V7Q0','V7Q2','V71E','V721','V730','V732')
    AND FS_DCH IN ('RSAG','MAL','AEL')
    AND PFS NOT LIKE 'VE%'
    AND BUDAT > '%s'
    AND BASL_AZF_DATE_CPT.RACCT IN ('2411101013','6111101250','6211002310'))
"""

df_xf01d = sqlContext.sql(query1)
df_xf01d = runInfoMap2(spark, query1, dict_dateFormat, dict_timestampFormat, '.', res_xf01d, "csv", 1, True, ";")
#df_tcd.show(10,False)

In [None]:
SELECT BASL_AZF_DATE_CPT.FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG5,BEC_ERKENNUNG7,FS_BSCHL,
                        FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY,CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,
                        VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT,FS_BLNR2,FS_HWAERS,
                        VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT,
                        WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,BASL_AZF_DATE_CPT.BUKRS,BLDAT,
                        BSCHL,BEWAR,LDGRP,VBUND,YYLOB,PRG,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,BASL_AZF_DATE_CPT.RACCT,A_ALTACCOUNT,SHKZG,BLART,
                        LIFNR,ZZBSZ,BASL_OUT_ID,BASL_AZF_DATE_CPT.CHANGE_DATE,HBSNR,VTNRAB,VTNRRE
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS BASL_AZF_DATE_CPT
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_RACCT ON (BASL_AZF_DATE_CPT.RACCT = OWNER_RACCT.RACCT)
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_BUKRS ON (BASL_AZF_DATE_CPT.BUKRS = OWNER_BUKRS.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_ACCOUNTS ON ((OWNER_RACCT.ID_RACCT = OWNER_ACCOUNTS.ID_RACCT) AND (AS OWNER_BUKRS.ID_BUKRS = OWNER_ACCOUNTS.ID_BUKRS))

WHERE BUKRS IN ('9LFR','9RFR','NCAL')
    AND ABZ IN ('V700','V701','V702','V708','V710','V711','V712','V713','V714','V71D','V720','V722','V7P0','V7P2','V7Q0','V7Q2','V71E','V721','V730','V732')
    AND FS_DCH IN ('RSAG','MAL','AEL')
    AND PFS NOT LIKE 'VE%'
    AND BUDAT > '2020-09-08'
    AND RACCT LIKE '511100%'
UNION ALL(
SELECT BASL_AZF_DATE_CPT.FS_BUKRS,PFS,BUDAT,BELDAT,BEC_ERKENNUNG1,BEC_ERKENNUNG2,BEC_ERKENNUNG3,BEC_ERKENNUNG5,BEC_ERKENNUNG7,FS_BSCHL,
                        FS_VBUND1,FS_VBUND2,FMK1,FMK2,PRODSCHL,VMK,KHD,FS_DCH,MAY,CSG,BKTXT,SGTXT,TWBTRG,FS_WAERS,FS_BLNR1,
                        VSNR,GSA,GSAKAT,TARIF,KUNDENNR,INKASSOART,BUBSZ,BGZN,SNR,RISKLAND,ZUGJAHR,ZUGMONAT,FS_BLNR2,FS_HWAERS,
                        VT_AZP_ORIGINAL,RVA,RIP,DATE4,XREF3,GSBER,PAYID,GSP,REFPGA,TICKID,MDCF,ABZ,WERBERNR,GJAGJE,GJVJ,WITHT,
                        WT_QSSHH,ZFBDT,BUPRT,MWSKZ,DATE1,DATE2,DATE3,RISK,SUBRISK,QTE_UC,ASSURE,ZZONR,KOSTL,WAERS,BASL_AZF_DATE_CPT.BUKRS,BLDAT,
                        BSCHL,BEWAR,LDGRP,VBUND,YYLOB,PRG,YYPRG,YYDCH,YYCSG,DMBTR,BELNR,CPUDT,BASL_AZF_DATE_CPT.RACCT,A_ALTACCOUNT,SHKZG,BLART,
                        LIFNR,ZZBSZ,BASL_OUT_ID,BASL_AZF_DATE_CPT.CHANGE_DATE,HBSNR,VTNRAB,VTNRRE
FROM parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS BASL_AZF_DATE_CPT
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_RACCT ON (BASL_AZF_DATE_CPT.RACCT = OWNER_RACCT.RACCT)
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_BUKRS ON (BASL_AZF_DATE_CPT.BUKRS = OWNER_BUKRS.BUKRS)
LEFT OUTER JOIN parquet.`/data/prod_env/data/edited_data/data_prep/GY/BASL_720_dt_cpt.parquet/GJAHR=2020` AS OWNER_ACCOUNTS ON ((OWNER_RACCT.ID_RACCT = OWNER_ACCOUNTS.ID_RACCT) AND (OWNER_BUKRS.ID_BUKRS = OWNER_ACCOUNTS.ID_BUKRS))
WHERE BUKRS IN ('9LFR','9RFR','NCAL')
    AND ABZ IN ('V700','V701','V702','V708','V710','V711','V712','V713','V714','V71D','V720','V722','V7P0','V7P2','V7Q0','V7Q2','V71E','V721','V730','V732')
    AND FS_DCH IN ('RSAG','MAL','AEL')
    AND PFS NOT LIKE 'VE%'
    AND BUDAT > '2020-09-08'
    AND RACCT IN ('2411101013','6111101250','6211002310'))

In [None]:
ZZONR = REA
AE = PV ZA K4 LDGRP M3

In [None]:
# EW3WE

In [None]:
## EW3WE
EW3WE_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/EW3WE/LDM_PRD_FRA_EW3WE_200926_110002.CSV", header=True, sep=";")
print("nombre lignes source : " + str(EW3WE_source.count()))
basl_max_source = EW3WE_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_source = EW3WE_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max source : " + str(basl_max_source))
print("basl out id min source : " + str(basl_min_source))
EW3WE_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20200926_20200926020154_EW3WE_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(EW3WE_res.count()))
basl_max_extract = EW3WE_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = EW3WE_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


In [None]:
EW3WE_source.select("BASL_OUT_ID").subtract(EW3WE_res.select("BASL_OUT_ID")).show()

In [None]:
EW3WE_source.select("RACCT").distinct().filter("BASL_OUT_ID IN ('22537299858','22532625289','22537299856','22532625293','22532625290','22532625179','22537299859','22537299857','22537395031','22532625287','22532625294','22532625295','22532625288','22532625178')").show()

In [3]:
## EW3WE
# EW3WE_source = spark.read.csv("/data/dropbox/larcher/FLUX_RETOURS/EW3WE/LDM_PRD_FRA_EW3WE_200926_110002.CSV", header=True, sep=";")
# print("nombre lignes source : " + str(EW3WE_source.count()))
# basl_max_source = EW3WE_source.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
# basl_min_source = EW3WE_source.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
# print("basl out id max source : " + str(basl_max_source))
# print("basl out id min source : " + str(basl_min_source))
EW3WE_res = spark.read.csv("/data/prod_env/data/out/archive/GY_imfrs/GY06_PRD_BI_PRD_20201003_20201003020253_EW3WE_0004.CSV", header=True, sep=";")
print("nombre lignes extract : " + str(EW3WE_res.count()))
basl_max_extract = EW3WE_res.agg({"BASL_OUT_ID": "max"}).collect()[0][0]
basl_min_extract = EW3WE_res.agg({"BASL_OUT_ID": "min"}).collect()[0][0]
print("basl out id max extract : " + str(basl_max_extract))
print("basl out id min extract : " + str(basl_min_extract))


nombre lignes extract : 226
basl out id max extract : 22574338552
basl out id min extract : 22566434601


In [346]:
spark.stop()