In [13]:
from time import time
from pyspark import SparkContext
for j in range(1,10):
    sc = SparkContext(master=f'local[{j}]')
    t0 = time()
    for i in range(10):
        sc.parallelize([1,2] * 1000000).reduce(lambda x,y:x+y)
    print(f'{j} executors, time= {time() - t0}')
    sc.stop()

ValueError: Cannot run multiple SparkContexts at once; existing SparkContext(app=individual_mast, master=local[*]) created by getOrCreate at /tmp/ipykernel_366436/2261142059.py:1 

In [14]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import UserDefinedFunction
from pyspark.sql.types import StringType,IntegerType, StructType, StructField
from pyspark.sql.functions import collect_list, split, struct, regexp_replace, col, round,concat,lit,avg

In [15]:
spark = SparkSession.builder.master("local[*]").appName('individual_mast').getOrCreate()

individual_contribution_schema = StructType([
    StructField("CMTE_ID", StringType(), False),
    StructField("AMNDT_IND", StringType(), True),
    StructField("RPT_TP", StringType(), True),
    StructField("TRANSACTION_PGI", StringType(), True),
    StructField("IMAGE_NUM", StringType(), True),
    StructField("TRANSACTION_TP", StringType(), True),
    StructField("ENTITY_TP", StringType(), True),
    StructField("NAME", StringType(), True),
    StructField("CITY", StringType(), True),
    StructField("STATE", StringType(), True),
    StructField("ZIP_CODE", StringType(), True),
    StructField("EMPLOYER", StringType(), True),
    StructField("OCCUPATION", StringType(), True),
    StructField("TRANSACTION_DT", IntegerType(), True),
    StructField("TRANSACTION_AMT", IntegerType(), True),
    StructField("OTHER_ID", StringType(), True),
    StructField("TRAN_ID", StringType(), True),
    StructField("FILE_NUM", IntegerType(), True),
    StructField("MEMO_CD", StringType(), True),
    StructField("MEMO_TEXT", StringType(), True),
    StructField("SUB_ID", StringType(), True)
    
])

individual_contribution = spark.read.format("csv").options(delimiter='|',inferschema='true',header='true').schema(individual_contribution_schema).load('data/indiv_cont/itcon*.txt')
cm_to_cn = spark.read.format("csv").options(delimiter='|',inferschema='true',header='true').load('data/itpas2.txt')
cm = spark.read.format("csv").options(delimiter='|',inferschema='true',header='true').load('data/cm.txt')
cn = spark.read.format("csv").options(delimiter='|',inferschema='true',header='true').load('data/cn2.txt')

                                                                                

In [16]:
individual_contribution = individual_contribution.drop ("AMNDT_IND","RPT_TP","TRANSACTION_PGI","IMAGE_NUM","TRANSACTION_TP","EMPLOYER","TRANSACTION_DT","TRAN_ID","FILE_NUM","MEMO_CD","MEMO_TEXT","SUB_ID")
cm_to_cn = cm_to_cn.drop("AMNDT_IND","RPT_TP","TRANSACTION_PGI","IMAGE_NUM","TRANSACTION_TP","ENTITY_TP","EMPLOYER","TRANSACTION_DT","TRAN_ID","FILE_NUM","MEMO_CD","MEMO_TEXT","SUB_ID")           
cm = cm.drop("CMTE_NM","TRES_NM","CMTE_ST1","CMTE_ST2","CMTE_CITY","CMTE_ST","CMTE_ZIP","CMTE_DSGN","CMTE_TP","CMTE_PTY_AFFILIATION","CMTE_FILING_FREQ","ORG_TP","CONNECTED_ORG_NM")
cn = cn.drop("CAND_PTY_AFFILIATION","CAND_ELECTION_YR","CAND_OFFICE_ST","CAND_OFFICE","CAND_OFFICE_DISTRICT","CAND_ICI","CAND_STATUS","CAND_PCC","CAND_ST1","CAND_ST2","CAND_CITY","CAND_ST","CAND_ZIP")

In [17]:
cm_to_cn.createOrReplaceTempView("cm_to_cn_tb")
individual_contribution.createOrReplaceTempView("individual_cont_tb")

individual_cont_tb = spark.sql(""" SELECT * FROM individual_cont_tb WHERE (ENTITY_TP == "IND" OR ENTITY_TP == "CAN") """)

In [18]:
individual_cont_tb.show(5)
individual_cont_tb.createOrReplaceTempView("individual_cont_tb")

22/11/21 01:13:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00000935, IND, EDELMAN, SHARON, NEW YORK, NY, 100242713, UNEMPLOYED, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00000935
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont.txt
+---------+---------+----------------+-------------+-----+---------+----------+---------------+--------+
|  CMTE_ID|ENTITY_TP|            NAME|         CITY|STATE| ZIP_CODE|OCCUPATION|TRANSACTION_AMT|OTHER_ID|
+---------+---------+----------------+-------------+-----+---------+----------+---------------+--------+
|C00000935|      IND| EDELMAN, SHARON|     NEW YORK|   NY|100242713|UNEMPLOYED|             15|    null|
|C00000935|      IND| EDELMAN, SHARON|     NEW YORK|   NY|100242713|UNEMPLOYED|             15|    null|
|C00000935|      IND|  DIEPPA, ISMAEL|  ALBUQUERQUE|   NM|871257039|   RETIRED|            150|    null|

In [19]:
from pyspark.sql.functions import substring, length, col, expr
individual_cont_tb = individual_cont_tb.withColumn('ZIP_CODE', individual_cont_tb['ZIP_CODE'].substr(1, 5))
individual_cont_tb.show()

22/11/21 01:13:11 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00000935, IND, EDELMAN, SHARON, NEW YORK, NY, 100242713, UNEMPLOYED, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00000935
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont.txt
+---------+---------+------------------+-------------+-----+--------+----------+---------------+--------+
|  CMTE_ID|ENTITY_TP|              NAME|         CITY|STATE|ZIP_CODE|OCCUPATION|TRANSACTION_AMT|OTHER_ID|
+---------+---------+------------------+-------------+-----+--------+----------+---------------+--------+
|C00000935|      IND|   EDELMAN, SHARON|     NEW YORK|   NY|   10024|UNEMPLOYED|             15|    null|
|C00000935|      IND|   EDELMAN, SHARON|     NEW YORK|   NY|   10024|UNEMPLOYED|             15|    null|
|C00000935|      IND|    DIEPPA, ISMAEL|  ALBUQUERQUE|   NM|   87125|   RETIRED|            150|   

In [20]:
from geopy.geocoders       import GoogleV3
from pyspark.sql.functions import col, udf, when
from pyspark.sql.types     import FloatType, ArrayType

geolocator = GoogleV3(api_key="AIzaSyA-bG9IEALnixDF5RBKfNPgwzEKSjO6smE")
zipc = individual_cont_tb.withColumn("long_lat", lit(None))
zipc.createOrReplaceTempView("zipc")

In [21]:
zipcode = zipc.select('ZIP_CODE').distinct()

In [22]:
coords=zipcode.rdd.map(lambda x: x[0]).collect()

22/11/21 01:14:17 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 100242713
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont.txt




22/11/21 01:15:21 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 91302
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210221_20210408.txt




22/11/21 01:15:23 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 761032524
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210409_20210520.txt




22/11/21 01:15:25 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 20170
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210521_20210625.txt




22/11/21 01:15:27 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 729575608
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210626_20210722.txt




22/11/21 01:15:29 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 301885023
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210723_20210815.txt




22/11/21 01:15:31 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 015322023
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210816_20210906.txt




22/11/21 01:15:33 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 974017833
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210907_20210928.txt




22/11/21 01:15:35 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 300417868
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210929_20211017.txt




22/11/21 01:15:37 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 336473563
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211018_20211104.txt




22/11/21 01:15:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 770423107
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211124.txt




22/11/21 01:15:41 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 770423107
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211125.txt




22/11/21 01:15:43 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 57049
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211213.txt




22/11/21 01:15:45 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 57049
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211214.txt




22/11/21 01:15:47 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 71282
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211126_20211214.txt




22/11/21 01:15:49 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 775107971
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211214_20220103.txt




22/11/21 01:15:51 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 57103
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211215_20220106.txt




22/11/21 01:15:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 94611
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220216.txt




22/11/21 01:15:55 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 73160
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220217.txt
22/11/21 01:15:57 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 763893104
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220104_20220319.txt




22/11/21 01:15:59 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 553912365
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220107_20220219.txt




22/11/21 01:16:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 19034
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220217_20220319.txt




22/11/21 01:16:03 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 44060
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220218_20220320.txt




22/11/21 01:16:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 85016
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220323.txt




22/11/21 01:16:07 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 85016
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220326.txt




22/11/21 01:16:09 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 067961407
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220320_20220418.txt




22/11/21 01:16:11 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 87190
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220321_20220419.txt




22/11/21 01:16:13 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220420.txt




22/11/21 01:16:15 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 100241722
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220421.txt




22/11/21 01:16:17 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220503.txt




22/11/21 01:16:19 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220511.txt




22/11/21 01:16:21 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220512.txt




22/11/21 01:16:23 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220513.txt




22/11/21 01:16:25 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20311222.txt




22/11/21 01:16:27 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 995075113
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220419_20220512.txt




22/11/21 01:16:29 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 221022512
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220420_20220513.txt




22/11/21 01:16:31 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 221012002
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220514.txt




22/11/21 01:16:33 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 221012002
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220515.txt




22/11/21 01:16:35 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 850294029
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220422_20220516.txt




22/11/21 01:16:37 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 221012305
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220504_20311222.txt




22/11/21 01:16:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 90049
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220512_20311222.txt
22/11/21 01:16:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 221015751
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20220604.txt




22/11/21 01:16:41 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 357418903
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20311222.txt
22/11/21 01:16:43 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 220031513
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220514_20220605.txt




22/11/21 01:16:45 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 222072045
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220515_20220606.txt




22/11/21 01:16:47 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 352094454
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220516_20220609.txt




22/11/21 01:16:49 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 339572914
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220517_20220612.txt




22/11/21 01:16:51 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 926062622
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220605_20220626.txt




22/11/21 01:16:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 910304935
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220606_20220627.txt




22/11/21 01:16:55 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 18640
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220607_20220628.txt




22/11/21 01:16:57 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 60546
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220609_20220630.txt




22/11/21 01:16:59 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 852818667
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220613_20311222.txt
22/11/21 01:16:59 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 336062852
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220627_20220727.txt




22/11/21 01:17:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 926204810
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220628_20220728.txt




22/11/21 01:17:03 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 287689187
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220628_20220802.txt




22/11/21 01:17:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 95762
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20220805.txt




22/11/21 01:17:07 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 902662076
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20220817.txt




22/11/21 01:17:09 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 902662076
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20311222.txt




22/11/21 01:17:11 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 488549759
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220727_20220824.txt




22/11/21 01:17:13 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 331346432
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220729_20220829.txt




22/11/21 01:17:15 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 606146085
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220803_20311222.txt




22/11/21 01:17:17 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 56085
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220806_20311222.txt




22/11/21 01:17:19 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 797658918
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220825_20311222.txt




22/11/21 01:17:20 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 35978
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220830_20311222.txt




22/11/21 01:17:26 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 402062618
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220514_20311222.txt




22/11/21 01:18:08 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 067961407
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220320_20311222.txt
22/11/21 01:18:08 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 943012820
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220327_20311222.txt




22/11/21 01:18:09 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 68022
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220630_20311222.txt
22/11/21 01:18:10 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: IND, 55436
 Schema: ENTITY_TP, ZIP_CODE
Expected: ENTITY_TP but found: IND
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220818_20311222.txt


                                                                                

In [None]:
y=0

list = []
for row in coords:
  
    try:
        location = geolocator.geocode(coords[y])
        result = str((location.latitude, location.longitude))
        list.append(result)
        coordinates = spark.createDataFrame([(value,) for value in list], ['id'])
  
    except:
        result = ""
    y+=1
    if y == 41116:
        break

In [11]:
cm.createOrReplaceTempView("cm_tb")

In [13]:
cn.createOrReplaceTempView("cn_tb")
test4 = spark.sql("""
SELECT 
    i.NAME AS INDIVIDUAL, i.CITY AS CITY, i.STATE AS STATE, i.ZIP_CODE AS ZIP_CODE, i.OCCUPATION, cm_tb.CAND_ID as ID,  
    SUM(i.TRANSACTION_AMT) AS TOTAL_MONEY_DONATED    
FROM individual_cont_tb AS i
FULL JOIN cm_tb ON (i.CMTE_ID == cm_tb.CMTE_ID) 
WHERE cm_tb.CAND_ID IS NOT NULL AND i.OTHER_ID IS NULL AND i.NAME IS NOT NULL AND (i.ENTITY_TP = 'CAN' OR i.ENTITY_TP = 'IND')
GROUP BY i.CMTE_ID, i.NAME, i.OTHER_ID, cm_tb.CAND_ID, i.CITY, i.STATE, i.ZIP_CODE, i.OCCUPATION
ORDER BY ID desc, TOTAL_MONEY_DONATED DESC 
""")
test4.createOrReplaceTempView("donorTablePre")

[Stage 10:==>                                                    (12 + 2) / 249]

In [14]:
tempTable1 = spark.sql("""

select ID, INDIVIDUAL, CITY, STATE, ZIP_CODE, OCCUPATION, TOTAL_MONEY_DONATED
from donorTablePre
order by ID desc
""")
tempTable1.createOrReplaceTempView("TEMP_TABLE1")

[Stage 10:====>                                                  (21 + 2) / 249]

In [15]:
test4.createOrReplaceTempView("temp_test4")

[Stage 10:=====>                                                 (27 + 2) / 249]

In [16]:
newTest4 = spark.sql("""
select ID, INDIVIDUAL, CITY, STATE, ZIP_CODE, OCCUPATION, TOTAL_MONEY_DONATED,
        row_number() over (partition by ID ORDER BY TOTAL_MONEY_DONATED desc) as indiv_rank
    from TEMP_TABLE1 
    order by ID asc, indiv_rank asc
""")
newTest4.createOrReplaceTempView("donorTablePre_1")



In [17]:
temptable2 = spark.sql("""
select distinct ID
from donorTablePre_1
""")



In [18]:
table5 = spark.sql("""
select *
FROM donorTablePre_1
where indiv_rank <= 5

""")
table5.createOrReplaceTempView("donorTablePre_5")



NameError: name 'donorTablePre_1' is not defined

In [19]:
#Nested-loops
#Reference from david_analytic_state.ipynb

unique = spark.sql("""
SELECT DISTINCT ID
FROM donorTablePre_5
""")

noName = spark.sql("""
SELECT ID AS ID, INDIVIDUAL, CITY, STATE, ZIP_CODE, OCCUPATION, TOTAL_MONEY_DONATED
FROM donorTablePre_5
""")

finalTable = unique.join(
    noName
        .groupBy("ID")
        .agg(collect_list(struct(noName.INDIVIDUAL, noName.CITY, noName.STATE, noName.ZIP_CODE, noName.OCCUPATION, noName.TOTAL_MONEY_DONATED)).alias("donateTable"))
    , "ID"
    , "outer"
)

finalTable.printSchema()

root
 |-- ID: string (nullable = true)
 |-- donateTable: array (nullable = true)
 |    |-- element: struct (containsNull = false)
 |    |    |-- INDIVIDUAL: string (nullable = true)
 |    |    |-- CITY: string (nullable = true)
 |    |    |-- STATE: string (nullable = true)
 |    |    |-- ZIP_CODE: string (nullable = true)
 |    |    |-- OCCUPATION: string (nullable = true)
 |    |    |-- TOTAL_MONEY_DONATED: long (nullable = true)





22/11/20 11:18:28 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765347, IND, AVIEZER, TSAFRIR, CALABASAS, CA, 91302, REAL ESTATE DEVELOPMENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765347
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210221_20210408.txt




22/11/20 11:18:31 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00769489, IND, YOUNG, PIPER, FORT WORTH, TX, 761032524, NOT EMPLOYED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00769489
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210409_20210520.txt




22/11/20 11:18:33 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00737114, IND, BINGEN, KARI, HERNDON, VA, 20170, CHIEF STRATEGY OFFICER, 308, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00737114
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210521_20210625.txt




22/11/20 11:18:35 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00477745, IND, HOPKINS, GLEN JR., VAN BUREN, AR, 729575608, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00477745
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210626_20210722.txt




22/11/20 11:18:38 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00283135, IND, FITZGERALD, ROBERT MARK, WOODSTOCK, GA, 301885023, BROKER, 170, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00283135
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210723_20210815.txt




22/11/20 11:18:40 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00196774, IND, PETERS, WILLIAM, NORTHBOROUGH, MA, 015322023, LANDLORD & PHOTOGRAPHER, 10, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00196774
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210816_20210906.txt




22/11/20 11:18:43 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00544817, IND, KRIER, JEFF, EUGENE, OR, 974017833, STATE FARM AGENT, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00544817
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210907_20210928.txt




22/11/20 11:18:45 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00544817, IND, TILKIN, MARK, CUMMING, GA, 300417868, COUNSEL, 23, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00544817
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210929_20211017.txt




22/11/20 11:18:48 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, PALMER, WILLIAM MR., TAMPA, FL, 336473563, RETIRED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211018_20211104.txt




22/11/20 11:18:50 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, MOSING, DOANLD, HOUSTON, TX, 770423107, PRESIDENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211124.txt




22/11/20 11:18:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, MOSING, DOANLD, HOUSTON, TX, 770423107, PRESIDENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211125.txt




22/11/20 11:18:55 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, JENSEN, SUE, DAKOTA DUNES, SD, 57049, FARMER, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211213.txt




22/11/20 11:18:57 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, JENSEN, SUE, DAKOTA DUNES, SD, 57049, FARMER, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211214.txt




22/11/20 11:19:00 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00781468, IND, SHIVERS, SHANNON, TALLULAH, LA, 71282, MANAGER, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00781468
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211126_20211214.txt




In [20]:
finalTable.coalesce(1).write.format('json').save('out_analytic_individual')

22/11/20 11:19:02 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, LEVIS, HILMA A. MRS., SANTA FE, TX, 775107971, RETIRED, 10, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211214_20220103.txt


[Stage 10:=====>         (90 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, FARMER, JOEL, SIOUX FALLS, SD, 57103, , 250, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211215_20220106.txt


[Stage 10:=====>         (92 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:08 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00401224, IND, CHEN, KATHERINE, OAKLAND, CA, 94611, NOT EMPLOYED, 3, C00685297
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00401224
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220216.txt


[Stage 10:=====>         (94 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:10 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00404392, IND, MCBRIDE, JENNIFER, OKLAHOMA CITY, OK, 73160, OWNER, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00404392
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220217.txt


[Stage 10:=====>         (96 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:13 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00325324, IND, HOFF, NEIL, WINDTHORST, TX, 763893104, DAIRY FARMERS OF AMERICA, 300, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00325324
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220104_20220319.txt


[Stage 10:=====>         (98 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:15 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00067884, IND, KIMMELSHUE, RUTH S, WAYZATA, MN, 553912365, ENTERPRISE LEADER, 416, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00067884
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220107_20220219.txt


[Stage 10:=====>        (100 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:18 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00136739, IND, WINCHESTER, ROBERT, ALBRIGHTSVILLE, PA, 19034, OPERATOR, 49, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00136739
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220217_20220319.txt


[Stage 10:=====>        (102 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:20 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00368720, IND, TOKICH, MICHAEL, MENTOR, OH, 44060, SR VICE PRESIDENT CHIEF FINANCIAL OFFI, 161, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00368720
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220218_20220320.txt


[Stage 10:=====>        (104 + 2) / 249][Stage 11:>                 (0 + 0) / 1]

22/11/20 11:19:23 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00473249, IND, BELLER, MICHAEL, PHOENIX, AZ, 85016, DIRECTOR OF ANALYTICS, 600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00473249
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220323.txt




22/11/20 11:19:26 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00473249, IND, BELLER, MICHAEL, PHOENIX, AZ, 85016, DIRECTOR OF ANALYTICS, 600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00473249
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220326.txt




22/11/20 11:19:29 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00632562, IND, SHEFFIELD, JAMES, WEST CORNWALL, CT, 067961407, RETIRED, 35, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00632562
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220320_20220418.txt




22/11/20 11:19:31 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00797340, IND, KISER, KARL, ALBUQUERQIE, NM, 87190, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00797340
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220321_20220419.txt




22/11/20 11:19:34 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220420.txt




22/11/20 11:19:36 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00800623, IND, LESSAR, STEVE, NEW YORK, NY, 100241722, INVESTMENT MANAGEMENT, 11600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00800623
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220421.txt




22/11/20 11:19:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220503.txt




22/11/20 11:19:41 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220511.txt




22/11/20 11:19:44 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220512.txt




22/11/20 11:19:47 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220513.txt




22/11/20 11:19:50 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20311222.txt




22/11/20 11:19:52 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00811141, IND, CARON, KENNETH, ANCHORAGE, AK, 995075113, RETIRED, 100, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00811141
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220419_20220512.txt




22/11/20 11:19:55 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, MASON, LESLIE, MCLEAN, VA, 221022512, NOT EMPLOYED, 2000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220420_20220513.txt




22/11/20 11:19:58 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, WARSHAW, MATTHEW, MCLEAN, VA, 221012002, RESEARCHER, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220514.txt




22/11/20 11:20:00 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, WARSHAW, MATTHEW, MCLEAN, VA, 221012002, RESEARCHER, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220515.txt




22/11/20 11:20:03 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00696526, IND, AMAN, JOHN, PHOENIX, AZ, 850294029, RETIRED, 100, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00696526
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220422_20220516.txt




22/11/20 11:20:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, ROMATOWSKI, PETER J, MCLEAN, VA, 221012305, ATTORNEY, 1000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220504_20311222.txt




22/11/20 11:20:08 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00776567, IND, HARWITT, KAREN, LOS ANGELES, CA, 90049, NOT EMPLOYED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00776567
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220512_20311222.txt




22/11/20 11:20:08 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, LEWIS, WARREN LEE, MC LEAN, VA, 221015751, LAWYER, 1000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20220604.txt




22/11/20 11:20:10 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00778142, IND, STEAKLEY, RODERIC, BROWNSBORO, AL, 357418903, ATTORNEY, 1000, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00778142
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20311222.txt




22/11/20 11:20:12 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, COFFMAN, MIRIAM F, ANNANDALE, VA, 220031513, RETIRED EDUCATOR, 200, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220514_20220605.txt




22/11/20 11:20:15 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, DELONEY, JULIA REBECCA, ARLINGTON, VA, 222072045, MANAGER, 25, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220515_20220606.txt




22/11/20 11:20:17 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00778142, IND, SAVAGE, ROBIN, BIRMINGHAM, AL, 352094454, CONSTRUCTION, 500, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00778142
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220516_20220609.txt




22/11/20 11:20:20 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00694323, IND, SMITH, WILLIAM, SANIBEL, FL, 339572914, RETIRED, 25, C00671891
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00694323
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220517_20220612.txt




22/11/20 11:20:23 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, HWU, ANN, IRVINE, CA, 926062622, REAL ESTATE AGENT, 1059, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220605_20220626.txt




22/11/20 11:20:25 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, TSAI, MITCHELL, SOUTH PASADENA, CA, 910304935, ATTORNEY, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220606_20220627.txt




22/11/20 11:20:28 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00401224, IND, PEREZ, NANCY, PITTSTON, PA, 18640, NOT EMPLOYED, 1000, C00685297
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00401224
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220607_20220628.txt




22/11/20 11:20:30 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00783134, IND, WILLSEY, KANELLA, RIVERSIDE, IL, 60546, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00783134
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220609_20220630.txt




22/11/20 11:20:33 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00696526, IND, BRAUN, JOSEPH, TEMPE, AZ, 852818667, PROFESSOR, 35, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00696526
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220613_20311222.txt
22/11/20 11:20:34 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00590489, IND, HOLMER, KATHERINE, TAMPA, FL, 336062852, PARALEGAL, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00590489
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220627_20220727.txt




22/11/20 11:20:37 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, WU, JACOB, IRVINE, CA, 926204810, NOT-EMPLOYED, 1000, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220628_20220728.txt




22/11/20 11:20:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00590489, IND, GIBAUD, JONATHA, PISGAH FOREST, NC, 287689187, , 35, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00590489
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220628_20220802.txt




22/11/20 11:20:42 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00401224, IND, PEARSON, DON, EL DORADO HILLS, CA, 95762, SOFTWARE EXECUTIVE, 50, C00042366
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00401224
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20220805.txt




22/11/20 11:20:44 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, SORENSEN, SCOTT, MANHATTAN BEACH, CA, 902662076, REAL ESTATE, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20220817.txt




22/11/20 11:20:46 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, SORENSEN, SCOTT, MANHATTAN BEACH, CA, 902662076, REAL ESTATE, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220629_20311222.txt




22/11/20 11:20:48 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 1, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220727_20220824.txt




22/11/20 11:20:51 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00445163, IND, LUKACS, MARYANNE, CORAL GABLES, FL, 331346432, MEDIATOR, 2900, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00445163
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220729_20220829.txt




22/11/20 11:20:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00445163, IND, LUSTIG, ANGELA, CHICAGO, IL, 606146085, NOT EMPLOYED, 2900, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00445163
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220803_20311222.txt




22/11/20 11:20:56 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00808329, IND, SELLNER, LORI A., SLEEPY EYE, MN, 56085, JOB PLANNER, 50, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00808329
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220806_20311222.txt




22/11/20 11:20:58 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00699660, IND, ROBERSON, FREDDY, ODESSA, TX, 797658918, RETIRED, 16, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00699660
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220825_20311222.txt




22/11/20 11:21:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00803254, IND, MILES, PAM, HENAGAR, AL, 35978, NORT EMPLOYED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00803254
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220830_20311222.txt




22/11/20 11:21:07 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00778142, IND, KING, MARYSE, LOUISVILLE, KY, 402062618, RETIRED, 25, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00778142
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220514_20311222.txt




22/11/20 11:22:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00632562, IND, BUBENIK, PATRICIA, PALO ALTO, CA, 943012820, RETIRED, 5, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00632562
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220327_20311222.txt




22/11/20 11:22:02 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00632562, IND, SHEFFIELD, JAMES, WEST CORNWALL, CT, 067961407, RETIRED, 35, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00632562
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220320_20311222.txt




22/11/20 11:22:04 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00276311, IND, KOLLI, RAMA, ELKHORN, NE, 68022, CHIEF INFO OFFCR, 425, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00276311
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220630_20311222.txt




22/11/20 11:22:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00808329, IND, MILBRATH, KATHLEEN, EDINA, MN, 55436, NOT EMPLOYED, 50, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00808329
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220818_20311222.txt


                                                                                

22/11/20 11:22:06 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00000935, IND, EDELMAN, SHARON, NEW YORK, NY, 100242713, UNEMPLOYED, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00000935
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont.txt


[Stage 12:===>           (60 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:24 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765347, IND, AVIEZER, TSAFRIR, CALABASAS, CA, 91302, REAL ESTATE DEVELOPMENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765347
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210221_20210408.txt


[Stage 12:===>           (62 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:27 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00769489, IND, YOUNG, PIPER, FORT WORTH, TX, 761032524, NOT EMPLOYED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00769489
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210409_20210520.txt


[Stage 12:===>           (64 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:29 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00737114, IND, BINGEN, KARI, HERNDON, VA, 20170, CHIEF STRATEGY OFFICER, 308, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00737114
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210521_20210625.txt


[Stage 12:===>           (66 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:32 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00477745, IND, HOPKINS, GLEN JR., VAN BUREN, AR, 729575608, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00477745
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210626_20210722.txt


[Stage 12:====>          (68 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:34 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00283135, IND, FITZGERALD, ROBERT MARK, WOODSTOCK, GA, 301885023, BROKER, 170, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00283135
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210723_20210815.txt


[Stage 12:====>          (70 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:37 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00196774, IND, PETERS, WILLIAM, NORTHBOROUGH, MA, 015322023, LANDLORD & PHOTOGRAPHER, 10, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00196774
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210816_20210906.txt


[Stage 12:====>          (72 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:39 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00544817, IND, KRIER, JEFF, EUGENE, OR, 974017833, STATE FARM AGENT, 25, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00544817
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210907_20210928.txt


[Stage 12:====>          (74 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:41 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00544817, IND, TILKIN, MARK, CUMMING, GA, 300417868, COUNSEL, 23, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00544817
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20210929_20211017.txt


[Stage 12:====>          (76 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:44 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, PALMER, WILLIAM MR., TAMPA, FL, 336473563, RETIRED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211018_20211104.txt


[Stage 12:====>          (78 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:46 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, MOSING, DOANLD, HOUSTON, TX, 770423107, PRESIDENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211124.txt


[Stage 12:====>          (80 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:49 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, MOSING, DOANLD, HOUSTON, TX, 770423107, PRESIDENT, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211105_20211125.txt


[Stage 12:====>          (82 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:51 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, JENSEN, SUE, DAKOTA DUNES, SD, 57049, FARMER, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211213.txt


[Stage 12:=====>         (84 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, JENSEN, SUE, DAKOTA DUNES, SD, 57049, FARMER, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211125_20211214.txt


[Stage 12:=====>         (86 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:56 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00781468, IND, SHIVERS, SHANNON, TALLULAH, LA, 71282, MANAGER, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00781468
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211126_20211214.txt


[Stage 12:=====>         (88 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:23:58 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00701003, IND, LEVIS, HILMA A. MRS., SANTA FE, TX, 775107971, RETIRED, 10, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00701003
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211214_20220103.txt


[Stage 12:=====>         (90 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00628917, IND, FARMER, JOEL, SIOUX FALLS, SD, 57103, , 250, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00628917
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20211215_20220106.txt


[Stage 12:=====>         (93 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:03 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00401224, IND, CHEN, KATHERINE, OAKLAND, CA, 94611, NOT EMPLOYED, 3, C00685297
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00401224
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220216.txt


[Stage 12:=====>         (94 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:06 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00404392, IND, MCBRIDE, JENNIFER, OKLAHOMA CITY, OK, 73160, OWNER, 250, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00404392
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220103_20220217.txt


[Stage 12:=====>         (96 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:09 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00325324, IND, HOFF, NEIL, WINDTHORST, TX, 763893104, DAIRY FARMERS OF AMERICA, 300, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00325324
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220104_20220319.txt


[Stage 12:=====>         (98 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:11 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00067884, IND, KIMMELSHUE, RUTH S, WAYZATA, MN, 553912365, ENTERPRISE LEADER, 416, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00067884
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220107_20220219.txt


[Stage 12:=====>        (100 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:14 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00136739, IND, WINCHESTER, ROBERT, ALBRIGHTSVILLE, PA, 19034, OPERATOR, 49, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00136739
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220217_20220319.txt


[Stage 12:=====>        (102 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:16 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00368720, IND, TOKICH, MICHAEL, MENTOR, OH, 44060, SR VICE PRESIDENT CHIEF FINANCIAL OFFI, 161, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00368720
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220218_20220320.txt


[Stage 12:=====>        (105 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:19 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00473249, IND, BELLER, MICHAEL, PHOENIX, AZ, 85016, DIRECTOR OF ANALYTICS, 600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00473249
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220323.txt


[Stage 12:=====>        (106 + 2) / 249][Stage 14:>                (0 + 0) / 11]

22/11/20 11:24:21 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00473249, IND, BELLER, MICHAEL, PHOENIX, AZ, 85016, DIRECTOR OF ANALYTICS, 600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00473249
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220220_20220326.txt




22/11/20 11:24:24 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00632562, IND, SHEFFIELD, JAMES, WEST CORNWALL, CT, 067961407, RETIRED, 35, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00632562
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220320_20220418.txt




22/11/20 11:24:26 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00797340, IND, KISER, KARL, ALBUQUERQIE, NM, 87190, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00797340
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220321_20220419.txt




22/11/20 11:24:29 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220420.txt




22/11/20 11:24:32 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00800623, IND, LESSAR, STEVE, NEW YORK, NY, 100241722, INVESTMENT MANAGEMENT, 11600, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00800623
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220421.txt




22/11/20 11:24:34 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220503.txt




22/11/20 11:24:37 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220511.txt




22/11/20 11:24:40 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220512.txt




22/11/20 11:24:42 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20220513.txt




22/11/20 11:24:45 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00765982, IND, CONKLIN, RODNEY, MASON, MI, 488549759, RETIRED, 12, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00765982
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220323_20311222.txt




22/11/20 11:24:47 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00811141, IND, CARON, KENNETH, ANCHORAGE, AK, 995075113, RETIRED, 100, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00811141
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220419_20220512.txt




22/11/20 11:24:49 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, MASON, LESLIE, MCLEAN, VA, 221022512, NOT EMPLOYED, 2000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220420_20220513.txt




22/11/20 11:24:52 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, WARSHAW, MATTHEW, MCLEAN, VA, 221012002, RESEARCHER, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220514.txt




22/11/20 11:24:54 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, WARSHAW, MATTHEW, MCLEAN, VA, 221012002, RESEARCHER, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220421_20220515.txt




22/11/20 11:24:57 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00696526, IND, AMAN, JOHN, PHOENIX, AZ, 850294029, RETIRED, 100, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00696526
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220422_20220516.txt




22/11/20 11:24:59 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, ROMATOWSKI, PETER J, MCLEAN, VA, 221012305, ATTORNEY, 1000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220504_20311222.txt




22/11/20 11:25:01 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00776567, IND, HARWITT, KAREN, LOS ANGELES, CA, 90049, NOT EMPLOYED, 50, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00776567
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220512_20311222.txt




22/11/20 11:25:02 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, LEWIS, WARREN LEE, MC LEAN, VA, 221015751, LAWYER, 1000, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20220604.txt




22/11/20 11:25:05 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00778142, IND, STEAKLEY, RODERIC, BROWNSBORO, AL, 357418903, ATTORNEY, 1000, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00778142
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220513_20311222.txt




22/11/20 11:25:07 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, COFFMAN, MIRIAM F, ANNANDALE, VA, 220031513, RETIRED EDUCATOR, 200, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220514_20220605.txt




22/11/20 11:25:10 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00555888, IND, DELONEY, JULIA REBECCA, ARLINGTON, VA, 222072045, MANAGER, 25, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00555888
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220515_20220606.txt




22/11/20 11:25:12 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00778142, IND, SAVAGE, ROBIN, BIRMINGHAM, AL, 352094454, CONSTRUCTION, 500, C00694323
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00778142
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220516_20220609.txt




22/11/20 11:25:15 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00694323, IND, SMITH, WILLIAM, SANIBEL, FL, 339572914, RETIRED, 25, C00671891
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00694323
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220517_20220612.txt




22/11/20 11:25:17 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, HWU, ANN, IRVINE, CA, 926062622, REAL ESTATE AGENT, 1059, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220605_20220626.txt




22/11/20 11:25:20 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00556506, IND, TSAI, MITCHELL, SOUTH PASADENA, CA, 910304935, ATTORNEY, 500, C00401224
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00556506
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220606_20220627.txt




22/11/20 11:25:22 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00401224, IND, PEREZ, NANCY, PITTSTON, PA, 18640, NOT EMPLOYED, 1000, C00685297
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00401224
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220607_20220628.txt




22/11/20 11:25:25 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: C00783134, IND, WILLSEY, KANELLA, RIVERSIDE, IL, 60546, RETIRED, 500, 
 Schema: CMTE_ID, ENTITY_TP, NAME, CITY, STATE, ZIP_CODE, OCCUPATION, TRANSACTION_AMT, OTHER_ID
Expected: CMTE_ID but found: C00783134
CSV file: file:/home/ubuntu/spark%20shit/data/indiv_cont/itcont_2022_20220609_20220630.txt


ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/home/ubuntu/cs179g4/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/home/ubuntu/cs179g4/lib/python3.10/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/usr/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt

KeyboardInterrupt: 