# Lemmatize Semantic Scholar papers using Spark NLP

In [1]:
from sparknlp.base import *
from sparknlp.annotator import *
import sparknlp
from pyspark.ml import Pipeline
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pathlib import Path

## 1. Read papers and concatenate the `title` and `paperAbstract` fields

In [2]:
%%time

# Loading papers table text fields, and concatenating them for lemmatization
S2papers = spark.sql("SELECT id, title, paperAbstract FROM parquet.`/export/ml4ds/IntelComp/Datalake/SemanticScholar/20220201/papers.parquet`")

##For development purposes only
#S2papers = S2papers.sample(fraction=0.0001)

#Concatenate text fields to lemmatize
S2papers = (
    S2papers.withColumn("rawtext",F.concat_ws('. ', "title", "paperAbstract"))
    .drop("title")
    .drop("paperAbstract")
)

print('Number of papers before language filtering:', S2papers.count())

22/03/21 13:28:11 WARN conf.HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist
22/03/21 13:28:11 WARN conf.HiveConf: HiveConf of name hive.stats.retries.wait does not exist
22/03/21 13:28:13 WARN metastore.ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0
22/03/21 13:28:13 WARN metastore.ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore jarenas@192.168.148.225
22/03/21 13:28:13 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
22/03/21 13:28:13 WARN metastore.ObjectStore: Failed to get database parquet, returning NoSuchObjectException

Number of papers before language filtering: 204457855
CPU times: user 30.1 ms, sys: 8.25 ms, total: 38.4 ms
Wall time: 2min 13s


                                                                                

## 2. Filter abstracts that are not in English Language

In [3]:
%%time

#Pipeline for language detection
documentAssembler = DocumentAssembler() \
    .setInputCol("rawtext") \
    .setOutputCol("document")

languageDetector = LanguageDetectorDL.pretrained() \
    .setInputCols("document") \
    .setOutputCol("language")

pipeline = Pipeline() \
    .setStages([
      documentAssembler,
      languageDetector
    ])

#Apply language detection pipeline
S2papers = pipeline.fit(S2papers).transform(S2papers)
S2papers = (
    S2papers.filter(F.col("language.result")[0]=="en")
    .drop("language")
)

print('Number of papers in English:', S2papers.count())

ld_wiki_tatoeba_cnn_21 download started this may take some time.
Approximate size to download 7.1 MB
[ | ]ld_wiki_tatoeba_cnn_21 download started this may take some time.
Approximate size to download 7.1 MB
Download done! Loading the resource.




[ / ]



[ — ]



[ \ ]



[ | ]

2022-03-21 13:30:42.325817: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-21 13:30:42.425946: I external/org_tensorflow/tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2399870000 Hz


[OK!]


22/03/21 13:30:53 ERROR scheduler.TaskSchedulerImpl: Lost executor 0 on node66.cluster.tsc.uc3m.es: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:30:53 WARN scheduler.TaskSetManager: Lost task 2.0 in stage 6.0 (TID 1061) (node66.cluster.tsc.uc3m.es executor 0): ExecutorLostFailure (executor 0 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:30:53 WARN scheduler.TaskSetManager: Lost task 12.0 in stage 6.0 (TID 1070) (node66.cluster.tsc.uc3m.es executor 0): ExecutorLostFailure (executor 0 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:30:53 WARN scheduler.TaskSetManager: Lost task 22.0 in st

22/03/21 13:31:48 ERROR scheduler.TaskSchedulerImpl: Lost executor 18 on node51.cluster.tsc.uc3m.es: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:48 WARN scheduler.TaskSetManager: Lost task 47.0 in stage 6.0 (TID 1186) (node51.cluster.tsc.uc3m.es executor 18): ExecutorLostFailure (executor 18 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:48 WARN scheduler.TaskSetManager: Lost task 34.1 in stage 6.0 (TID 1180) (node51.cluster.tsc.uc3m.es executor 18): ExecutorLostFailure (executor 18 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:48 WARN scheduler.TaskSetManager: Lost task 3.1 

22/03/21 13:31:54 ERROR scheduler.TaskSchedulerImpl: Lost executor 5 on node68.cluster.tsc.uc3m.es: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:54 WARN scheduler.TaskSetManager: Lost task 89.0 in stage 6.0 (TID 1214) (node68.cluster.tsc.uc3m.es executor 5): ExecutorLostFailure (executor 5 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:54 WARN scheduler.TaskSetManager: Lost task 108.0 in stage 6.0 (TID 1222) (node68.cluster.tsc.uc3m.es executor 5): ExecutorLostFailure (executor 5 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:31:54 WARN scheduler.TaskSetManager: Lost task 95.0 in 

22/03/21 13:33:04 ERROR scheduler.TaskSchedulerImpl: Lost executor 38 on node71.cluster.tsc.uc3m.es: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:33:04 WARN scheduler.TaskSetManager: Lost task 266.2 in stage 6.0 (TID 1324) (node71.cluster.tsc.uc3m.es executor 38): ExecutorLostFailure (executor 38 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:33:04 WARN scheduler.TaskSetManager: Lost task 54.2 in stage 6.0 (TID 1327) (node71.cluster.tsc.uc3m.es executor 38): ExecutorLostFailure (executor 38 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:33:04 WARN scheduler.TaskSetManager: Lost task 66.

Py4JJavaError: An error occurred while calling o145.count.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 61 in stage 6.0 failed 4 times, most recent failure: Lost task 61.3 in stage 6.0 (TID 1342) (node68.cluster.tsc.uc3m.es executor 39): ExecutorLostFailure (executor 39 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:390)
	at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:3006)
	at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:3005)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
	at org.apache.spark.sql.Dataset.count(Dataset.scala:3005)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)


22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 134.0 in stage 6.0 (TID 1244) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 188.0 in stage 6.0 (TID 1246) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 166.0 in stage 6.0 (TID 1245) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 260.0 in stage 6.0 (TID 1249) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 29.2 in stage 6.0 (TID 1243) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetManager: Lost task 267.0 in stage 6.0 (TID 1250) (node29.cluster.tsc.uc3m.es executor 14): TaskKilled (Stage cancelled)
22/03/21 13:33:27 WARN scheduler.TaskSetM

22/03/21 13:33:36 WARN scheduler.TaskSetManager: Lost task 131.0 in stage 6.0 (TID 1138) (node84.cluster.tsc.uc3m.es executor 4): TaskKilled (Stage cancelled)
22/03/21 13:33:37 WARN scheduler.TaskSetManager: Lost task 52.0 in stage 6.0 (TID 1132) (node84.cluster.tsc.uc3m.es executor 4): TaskKilled (Stage cancelled)
22/03/21 13:33:37 WARN scheduler.TaskSetManager: Lost task 107.0 in stage 6.0 (TID 1136) (node84.cluster.tsc.uc3m.es executor 4): TaskKilled (Stage cancelled)
22/03/21 13:33:38 WARN scheduler.TaskSetManager: Lost task 142.0 in stage 6.0 (TID 1300) (node21.cluster.tsc.uc3m.es executor 35): TaskKilled (Stage cancelled)
22/03/21 13:33:38 WARN scheduler.TaskSetManager: Lost task 115.1 in stage 6.0 (TID 1299) (node21.cluster.tsc.uc3m.es executor 35): TaskKilled (Stage cancelled)
22/03/21 13:33:38 WARN scheduler.TaskSetManager: Lost task 398.0 in stage 6.0 (TID 1306) (node21.cluster.tsc.uc3m.es executor 35): TaskKilled (Stage cancelled)
22/03/21 13:33:38 WARN scheduler.TaskSetMana

22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 13.1 in stage 6.0 (TID 1139) (node13.cluster.tsc.uc3m.es executor 9): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 113.0 in stage 6.0 (TID 1055) (node39.cluster.tsc.uc3m.es executor 23): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 111.1 in stage 6.0 (TID 1277) (node83.cluster.tsc.uc3m.es executor 16): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 98.1 in stage 6.0 (TID 1279) (node83.cluster.tsc.uc3m.es executor 16): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 155.0 in stage 6.0 (TID 1143) (node13.cluster.tsc.uc3m.es executor 9): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManager: Lost task 16.1 in stage 6.0 (TID 1140) (node13.cluster.tsc.uc3m.es executor 9): TaskKilled (Stage cancelled)
22/03/21 13:33:42 WARN scheduler.TaskSetManage

22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 158.0 in stage 6.0 (TID 1205) (node93.cluster.tsc.uc3m.es executor 24): TaskKilled (Stage cancelled)
22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 262.0 in stage 6.0 (TID 1209) (node93.cluster.tsc.uc3m.es executor 24): TaskKilled (Stage cancelled)
22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 303.0 in stage 6.0 (TID 1210) (node93.cluster.tsc.uc3m.es executor 24): TaskKilled (Stage cancelled)
22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 138.0 in stage 6.0 (TID 1203) (node93.cluster.tsc.uc3m.es executor 24): TaskKilled (Stage cancelled)
22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 144.0 in stage 6.0 (TID 1289) (node85.cluster.tsc.uc3m.es executor 32): TaskKilled (Stage cancelled)
22/03/21 13:33:49 WARN scheduler.TaskSetManager: Lost task 291.1 in stage 6.0 (TID 1285) (node85.cluster.tsc.uc3m.es executor 32): TaskKilled (Stage cancelled)
22/03/21 13:33:50 WARN scheduler.TaskSet

22/03/21 13:33:55 WARN scheduler.TaskSetManager: Lost task 26.1 in stage 6.0 (TID 1093) (node82.cluster.tsc.uc3m.es executor 25): TaskKilled (Stage cancelled)
22/03/21 13:33:56 WARN scheduler.TaskSetManager: Lost task 22.1 in stage 6.0 (TID 1096) (node82.cluster.tsc.uc3m.es executor 25): TaskKilled (Stage cancelled)
22/03/21 13:33:56 WARN scheduler.TaskSetManager: Lost task 15.1 in stage 6.0 (TID 1094) (node82.cluster.tsc.uc3m.es executor 25): TaskKilled (Stage cancelled)
22/03/21 13:33:56 WARN scheduler.TaskSetManager: Lost task 2.1 in stage 6.0 (TID 1098) (node82.cluster.tsc.uc3m.es executor 25): TaskKilled (Stage cancelled)
[Stage 6:>                                                      (0 + 16) / 1000]

## 3. Define Lemmatization Pipeline

   - We work on documents created in Subsection 2
   - Sentence Detection and Tokenizer applied to detect tokens
   - Lemmatization is carried out
   - Stopwords are applied
   - Punctuation symbols are removed
   - Result is converted back from Spark NLP annotations to string format

In [4]:
%%time 

#Next, we carry out the lemmatization pipeline

sentenceDetector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

lemmatizer = LemmatizerModel.pretrained() \
    .setInputCols(["token"]) \
    .setOutputCol("lemma")

stopWords = StopWordsCleaner() \
    .setInputCols(["lemma"]) \
    .setOutputCol("cleanlemma")

normalizer = Normalizer() \
    .setInputCols(["cleanlemma"]) \
    .setOutputCol("normalizedlemma") \
    .setLowercase(True) \
    .setCleanupPatterns(["""[^\w\d\s]"""])

finisher = Finisher() \
     .setInputCols(['normalizedlemma'])

pipeline = Pipeline() \
    .setStages([
      sentenceDetector,
      tokenizer,
      lemmatizer,
      stopWords,
      normalizer,
      finisher
])

#We apply pipeline and recover lemmas as string
S2papers = pipeline.fit(S2papers).transform(S2papers)

udf_back2str = F.udf(lambda x:' '.join(list(x)), StringType() )
S2papers = (
    S2papers.withColumn("lemmas",udf_back2str(F.col("finished_normalizedlemma")))
    .drop("rawtext")
    .drop("finished_normalizedlemma")
)

#Show results of validation for n papers
#S2papers.show(n=10, truncate=120, vertical=True)

lemma_antbnc download started this may take some time.
Approximate size to download 907.6 KB
[ | ]lemma_antbnc download started this may take some time.
Approximate size to download 907.6 KB
[ / ]Download done! Loading the resource.




[ — ]



[ \ ]

                                                                                

[OK!]


[Stage 10:>                                                         (0 + 1) / 1]

-RECORD 0--------------------------------------------------------------------------------------------------------------------------
 id     | ad05287296deebf0a27a2ae32a7e9f8fbfff5d14                                                                                 
 lemmas | active species mechanistic pathways ironcatalyzed cc bondforming crosscoupling reactions past decade considerable pro... 
-RECORD 1--------------------------------------------------------------------------------------------------------------------------
 id     | c0d604dcbb9d0b44737bf04763271b6eecbc5ddb                                                                                 
 lemmas | design rule nanomedical engineering physical virology application virusbased material medicine physical virology seek... 
-RECORD 2--------------------------------------------------------------------------------------------------------------------------
 id     | c6cdf58235e11e615da62112b09f3266a5d5d882                          

                                                                                

## 4. Save a table with `id` and `lemmas` to HDFS

In [5]:
%%time

#Save calculated lemmas to HDFS
dir_parquet = Path("/export/ml4ds/IntelComp/Datalake/SemanticScholar/20220201")

S2papers.write.parquet(
    dir_parquet.joinpath(f"papers_NLP.parquet").as_posix(),
    mode="overwrite",
)

22/03/21 13:15:07 ERROR scheduler.TaskSchedulerImpl: Lost executor 16 on node55.cluster.tsc.uc3m.es: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:15:07 WARN scheduler.TaskSetManager: Lost task 19.0 in stage 11.0 (TID 2111) (node55.cluster.tsc.uc3m.es executor 16): ExecutorLostFailure (executor 16 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:15:07 WARN scheduler.TaskSetManager: Lost task 2.0 in stage 11.0 (TID 2102) (node55.cluster.tsc.uc3m.es executor 16): ExecutorLostFailure (executor 16 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
22/03/21 13:15:07 WARN scheduler.TaskSetManager: Lost task 9.0

CPU times: user 418 ms, sys: 100 ms, total: 519 ms
Wall time: 3min


## 5. Optional: Check that the generated table looks OK

In [7]:
#Test that the saved table is correct
S2papers = spark.sql("SELECT * FROM parquet.`/export/ml4ds/IntelComp/Datalake/SemanticScholar/20220201/papers_NLP.parquet`")
S2papers.show(n=10, truncate=120, vertical=True)
print('Number of lemmatized papers:', S2papers.count())

22/03/21 13:18:56 WARN metastore.ObjectStore: Failed to get database parquet, returning NoSuchObjectException


-RECORD 0--------------------------------------------------------------------------------------------------------------------------
 id     | 21960a5deecbcf8aa360e7e1880ce0f88e5bb4ca                                                                                 
 lemmas | clock recovery technique base spectral restoration clock recovery technique suitable several family modulationdemodul... 
-RECORD 1--------------------------------------------------------------------------------------------------------------------------
 id     | 077bda491fa46e833b3f893c4e860bba417a17ae                                                                                 
 lemmas | evaluation trends foreign trade development postcommunist countries europe years 20002012 following accession eu abst... 
-RECORD 2--------------------------------------------------------------------------------------------------------------------------
 id     | 255b1aec6053f33afaf5cb369f67abb285f29efb                          

14998