diff --git a/AMR_FEATURE/.classpath b/AMR_FEATURE/.classpath
new file mode 100644
index 0000000..e910e9c
--- /dev/null
+++ b/AMR_FEATURE/.classpath
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+	<classpathentry kind="lib" path="/afs/inf.ed.ac.uk/user/s15/s1544871/workspace/AMR_FEATURE/src/json-20170516.jar"/>
+	<classpathentry kind="lib" path="/disk/scratch/s1544871/Dependency/stanford-corenlp-full-2018-02-27/stanford-corenlp-3.9.1.jar"/>
+	<classpathentry kind="lib" path="/disk/scratch/s1544871/Dependency/stanford-corenlp-full-2018-02-27/stanford-corenlp-3.9.1-models.jar"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/AMR_FEATURE/.project b/AMR_FEATURE/.project
new file mode 100644
index 0000000..b04548c
--- /dev/null
+++ b/AMR_FEATURE/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>AMR_FEATURE</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/AMR_FEATURE/.settings/org.eclipse.jdt.core.prefs b/AMR_FEATURE/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..3a21537
--- /dev/null
+++ b/AMR_FEATURE/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/AMR_FEATURE/bin/convertingAMR.class b/AMR_FEATURE/bin/convertingAMR.class
new file mode 100644
index 0000000..0e63a79
Binary files /dev/null and b/AMR_FEATURE/bin/convertingAMR.class differ
diff --git a/AMR_FEATURE/bin/json-20170516.jar b/AMR_FEATURE/bin/json-20170516.jar
new file mode 100644
index 0000000..5033f66
Binary files /dev/null and b/AMR_FEATURE/bin/json-20170516.jar differ
diff --git a/AMR_FEATURE/joints.txt b/AMR_FEATURE/joints.txt
new file mode 100644
index 0000000..78be015
--- /dev/null
+++ b/AMR_FEATURE/joints.txt
@@ -0,0 +1,922 @@
+have to
+at all
+so far
+more than
+less than
+no one
+as well
+at least
+right wing
+left wing
+as long as
+all over
+of course
+kind of
+after all
+by oneself
+by the way
+in fact
+be all
+head up
+come out
+coop up
+seize up
+bust up
+hang out
+limber up
+quieten down
+crack up
+fuck up
+get out
+clear out
+rip up
+rock on
+shout down
+bundle up
+pump up
+smooth out
+set down
+drop off
+think over
+core out
+tidy up
+make off
+fight on
+set out
+think up
+try out
+sign in
+take out
+top off
+nail down
+block up
+cash in
+fork out
+mark down
+rattle off
+bandage up
+sleep over
+patch up
+freeze over
+seal off
+free up
+clown around
+tear down
+dust off
+live up
+cut loose
+louse up
+sit down
+stand by
+take up
+steal away
+lay off
+turn in
+meet up
+check up
+taper off
+dole out
+catch up
+shape up
+tax away
+pass off
+give in
+speak up
+call upon
+stall out
+butt in
+carve out
+step up
+trigger off
+prop up
+scoop up
+summon forth
+boss around
+cool down
+give back
+cut down
+jot down
+doze off
+drum up
+bog down
+throw out
+shy away
+frost over
+rack up
+even out
+light up
+shack up
+bone up
+cut out
+sum up
+shut up
+send out
+pine away
+take over
+gobble up
+shoot back
+lay on
+swear off
+spread out
+pin down
+find out
+drag on
+thaw out
+bump off
+fatten up
+get back
+arm up
+load up
+give vent
+top up
+bounce back
+bad off
+come by
+single out
+call out
+slow down
+ask out
+slice up
+roll up
+divide up
+hold over
+touch off
+pass out
+have mod
+screw up
+iron out
+tell on
+dry out
+zero out
+rev up
+request confirmation
+scrawl out
+tie in
+pass up
+scratch out
+miss out
+root out
+frighten off
+have subevent
+go on
+follow through
+lighten up
+trade off
+carry over
+pay out
+mellow out
+fool around
+get down
+stretch out
+run down
+scrub up
+splash out
+stop by
+touch upon
+dig out
+stick around
+act out
+pass by
+watch out
+share out
+shut out
+get along
+go through
+tease out
+kill off
+slug out
+bottom out
+tie down
+neaten up
+dress down
+turn off
+bandy around
+yammer away
+gulp down
+cut back
+chatter away
+glaze over
+drop by
+slack off
+fess up
+seek out
+creep out
+hold up
+knock up
+shine through
+fence off
+zero in
+flip out
+rein in
+screen out
+cheer up
+saw up
+sign off
+flatten out
+heat up
+add on
+clip off
+doll up
+touch on
+fall off
+suit up
+palm off
+mist over
+flesh out
+burn up
+sweat out
+work up
+brazen out
+peel off
+pay up
+get even
+fill out
+whip up
+shout out
+kick in
+draw up
+thrash out
+head off
+come in
+break up
+speed up
+spout off
+type up
+polish off
+trot out
+puke up
+bank up
+rip off
+dry up
+settle down
+cry out
+go out
+face off
+ride up
+buckle up
+pair up
+come off
+auction off
+roll back
+throw in
+eat up
+suck up
+shut down
+wipe out
+nod off
+choke off
+sleep off
+stand up
+frost up
+join in
+mix up
+crisp up
+knock out
+talk out
+set off
+sit in
+bang on
+flake out
+take off
+queue up
+square off
+make over
+ramp up
+let down
+toss out
+finish up
+blow over
+sound off
+cut up
+rough in
+blot out
+stave off
+stop off
+act up
+scout out
+pay off
+beat out
+copy out
+wolf down
+have manner
+get through
+break off
+drug up
+pump out
+take hold
+polish up
+pucker up
+write off
+shell out
+come over
+color in
+tamp down
+shut off
+have mode
+strike up
+beat up
+sweep up
+come up
+blast off
+lie in
+warm over
+ratchet up
+bump up
+play out
+look out
+tip over
+fudge over
+warm up
+throw away
+crank up
+tip off
+have quant
+go back
+roll out
+trim down
+set up
+rake in
+piss off
+give over
+buoy up
+pen up
+touch up
+parcel out
+boom out
+give off
+jump up
+leave over
+tone down
+dream on
+lock in
+win over
+stop over
+turn over
+play on
+edge out
+get up
+leave off
+finish off
+slim down
+wall off
+puff up
+plug up
+write out
+let out
+stop up
+calm down
+bring about
+phase out
+belly up
+break down
+stick up
+lock up
+pull out
+set upon
+jet off
+pay down
+fart around
+zone out
+bear out
+take away
+bleed off
+write up
+lash out
+lam out
+tie up
+siphon off
+dress up
+stamp out
+black out
+snuff out
+whip out
+go off
+ease up
+tune out
+gun down
+freak out
+chop down
+strip away
+step down
+hit up
+read up
+chew up
+start out
+own up
+close down
+come upon
+cone down
+yield up
+get away
+gear up
+bring on
+figure out
+turn up
+check out
+bead up
+ship out
+crank out
+flush out
+let on
+put on
+usher in
+spin off
+knock off
+skim off
+pass on
+finish out
+instead of
+leave out
+frighten away
+buy up
+knock over
+straighten out
+wear off
+whiz away
+call on
+put out
+totter around
+salt away
+spell out
+creep up
+hold out
+sign up
+branch out
+mark up
+hail down
+pick out
+shoot off
+din out
+beef up
+get off
+break through
+smarten up
+help out
+buy out
+stake out
+take in
+do in
+come to
+sell out
+shore up
+hem in
+hang up
+boil over
+sort out
+wipe up
+curl up
+whack off
+track down
+dig up
+run out
+haul out
+plot out
+loan out
+coil up
+die off
+pipe down
+kick off
+come through
+print out
+pick away
+gloss over
+ring up
+go down
+read off
+pitch in
+choke up
+break in
+crack down
+boot up
+blurt out
+sluice down
+fill up
+spring up
+lock out
+pack up
+look over
+whittle down
+chicken out
+bandy about
+cart off
+plug in
+buy off
+pick on
+crash out
+total up
+pile on
+pan out
+prick up
+dish up
+stash away
+round up
+shoot up
+balance out
+bring along
+quiet down
+cut off
+vamp up
+run off
+pull down
+team up
+hold back
+hammer out
+stack up
+think through
+match up
+rise up
+have concession
+wipe off
+hash out
+come down
+sock away
+jump in
+hang on
+ferret out
+wake up
+brick over
+burst out
+tack down
+spike out
+use up
+carry on
+bottle up
+tighten up
+start up
+carry off
+speak out
+set about
+tag along
+hook up
+oil up
+fend off
+start over
+sit up
+sign on
+take down
+study up
+while away
+fold up
+cheer on
+bust out
+rate entity
+play down
+book up
+bind up
+stay on
+come about
+put up
+dine out
+have frequency
+store up
+give up
+vote down
+bring up
+tape up
+leave behind
+turn on
+save up
+break out
+wash up
+fork over
+hollow out
+freshen up
+screw over
+dash off
+have part
+mess up
+buy into
+burn out
+cave in
+lead up
+clear up
+cry down
+stand out
+turn away
+drown out
+run in
+cover up
+spill over
+die out
+farm out
+hand over
+poke around
+ride out
+come across
+give away
+tack on
+bow out
+squeeze out
+write in
+show up
+come on
+fix up
+sew up
+fort up
+do away
+liven up
+scrunch up
+log on
+ham up
+look down
+firm up
+tally up
+tool up
+weigh in
+flare up
+strike down
+thin out
+blast away
+reel off
+feed up
+camp out
+well off
+crop up
+be like
+open up
+link up
+lick up
+look up
+statistical test
+charge off
+drop out
+keep up
+tick off
+tune in
+write down
+bat in
+stay over
+gas up
+pick up
+cook up
+boil down
+pull through
+call off
+pop off
+hand out
+push up
+fritter away
+trail off
+chop up
+rear end
+fuck around
+rattle on
+tire out
+street address
+keep on
+pack away
+keg stand
+close off
+lose out
+wring out
+make believe
+soak up
+tee off
+shake up
+scent out
+steer clear
+have instrument
+tear up
+feel up
+live down
+bowl over
+step in
+hobnob around
+bow down
+buzz off
+tangle up
+catch on
+price out
+snap up
+live out
+touch base
+be done
+have li
+vomit up
+clean out
+laid back
+buckle down
+slip in
+swear in
+stall off
+shoot down
+be from
+serve up
+join up
+back up
+well up
+pull up
+put down
+wash down
+dish out
+age out
+fight back
+bring down
+run up
+zip up
+switch over
+spend down
+call up
+be polite
+pop up
+fall apart
+net out
+jut out
+wind up
+rent out
+cross out
+rough up
+broke ass
+dredge up
+wait out
+shuffle off
+build up
+box in
+shake off
+cool off
+get on
+hit on
+straighten up
+start off
+belch out
+lie down
+play up
+give out
+haul in
+hard put
+make up
+snap off
+follow suit
+pass away
+smooth over
+hole up
+turn out
+clog up
+sober up
+smash up
+contract out
+go over
+dope up
+bed down
+sit out
+hype up
+drop in
+put off
+ward off
+get together
+turn down
+back off
+swoop up
+out trade
+size up
+pull off
+conjure up
+stock up
+sleep away
+monkey around
+break away
+pile up
+put in
+dream up
+wrap up
+gum up
+bound up
+tuck away
+board up
+have purpose
+stick out
+fall out
+take aback
+chart out
+latch on
+belt out
+wear on
+muck up
+step aside
+lead off
+point out
+line up
+check in
+start in
+bunch up
+watch over
+fill in
+work out
+joke around
+hum along
+lock down
+wear out
+rip out
+bleed out
+come along
+play off
+show off
+have extent
+concrete over
+narrow down
+jack up
+stare down
+pipe up
+loosen up
+wear down
+bear up
+cover over
+have polarity
+mic up
+make do
+close over
+deck out
+blow out
+play to
+hammer away
+ration out
+sell off
+have name
+strike out
+shuttle off
+call in
+shrug off
+chalk up
+perk up
+knock down
+follow up
+pass over
+brush off
+drink up
+fly out
+close in
+grow up
+eat away
+have condition
+snatch away
+pick off
+stress out
+take on
+muddle up
+tuck in
+live on
+skip off
+look forward
+stir up
+bail out
+stand down
+close up
+run over
+throw up
+fuck off
+swallow up
+spill out
+fall back
+fight off
+rig up
+sweat off
+hide out
+divvy up
+flash back
+end up
+make it
+toss in
+round out
+sniff out
+grind up
+chip in
+cough up
+phase in
+let up
+water down
+hold on
+level off
+have value
+fit in
+yammer on
+key in
+hold off
+silt up
+get by
+split up
+make out
+look after
+rubber stamp
+sketch out
+pull over
+spruce up
+glass over
+add up
+mist up
+brush up
+wind down
+clutch on
+knock back
+pare down
+rule out
+fall through
+hack away
+asphalt over
+clean up
+pound out
+die down
+carry out
+fall over
+blow up
+weasel out
+break even
diff --git a/AMR_FEATURE/src/convertingAMR.java b/AMR_FEATURE/src/convertingAMR.java
new file mode 100644
index 0000000..56777f0
--- /dev/null
+++ b/AMR_FEATURE/src/convertingAMR.java
@@ -0,0 +1,493 @@
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
+import edu.stanford.nlp.semgraph.SemanticGraphEdge;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.PropertiesUtils;
+
+public class convertingAMR {
+	private Map<String, Set<String>> map;
+
+	/*initialize from joints.txt, which is used for greedily connecting phrases
+	 * e.g. make-up
+	 * extracted from AMRPropBank and training set
+	 * */
+	public convertingAMR(String file) {
+		map = new HashMap<String, Set<String>>();
+		Set<String> tmp;
+
+		try (FileInputStream fis = new FileInputStream(file);
+				BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));) {
+
+			String line;
+			String[] pair;
+			while ((line = br.readLine()) != null) {
+				pair = line.split(" ");
+				String past = "";
+				for (int i = 0; i < pair.length - 1; i++) {
+					past += pair[i] + " ";
+					tmp = map.getOrDefault(past.trim(), new HashSet<String>());
+					tmp.add(pair[i + 1]);
+					map.put(past.trim().replace(" ", "-"), tmp);
+
+				}
+
+			}
+		//	System.out.println(map.toString());
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static List<String> folderToFilesPath(String folder, String suffix) {
+		List<String> results = new ArrayList<String>();
+
+		File[] files = new File(folder).listFiles();
+		// If this pathname does not denote a directory, then listFiles() returns null.
+
+		for (File file : files) {
+			if (file.isFile() && file.getName().endsWith(suffix)) {
+				results.add(folder + file.getName());
+			}
+		}
+		return results;
+	}
+
+	public void featureExtract(String file) {
+
+		// build pipeline
+		StanfordCoreNLP pipeline = new StanfordCoreNLP(
+				PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner", "tokenize.options",
+						"splitHyphenated=true", 
+						"tokenize.whitespace", "true",//start with tokenized file 
+						"ssplit.isOneSentence",  //ignore multi-sentence construction
+						"true", "tokenize.language", "en"));
+		
+		String[] name = file.split("/");
+
+		String line = "";
+		try (FileInputStream fis = new FileInputStream(file);
+				BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));) {
+
+			System.out.println(name[name.length - 1]);
+			int n = 0;
+			int changed = 0;
+			List<JSONObject> obs = new ArrayList<JSONObject>();
+			line = br.readLine();
+			while (line != null && !line.trim().isEmpty()) {
+				// if (n % 2 == 0)
+				// System.out.println(n+"\n"+line);
+				n++;
+				JSONObject obj = new JSONObject();
+				StringBuilder pre = new StringBuilder();
+				while (!line.startsWith("# ::tok ") && !line.startsWith("# ::snt ")) {
+					pre.append(line + "\n");
+					line = br.readLine();
+				}
+				obj.put("pre", pre.toString());
+				
+				//build a sentence without buggy texts....
+				String snt = line.replace("# ::tok ", "").replace("# ::snt ", "");
+				snt = snt.replaceAll("\\.{2,}", "").replaceAll("  ", " ");
+				snt = snt.replace("  ", " ").replace("  ", " ").replace("\n", "");
+				snt = snt.replaceAll("\"", " \" ");
+				snt = snt.replaceAll("\\(", " \\( ");
+				snt = snt.replaceAll("\\)", " \\) ");
+				snt = snt.replaceAll("@-@", "-").replaceAll(" @:@ ", ":");
+				obj.put("snt", snt);
+
+				//initial feature extraction and connecting of phrase
+				HashMap<String, LinkedList<String>> data = extractSentence(obj, pipeline,true);
+				//connects number
+				changed += post_procee_number(data);
+				//connects ner, mainly due to "-" and "'s" construction in AMR NER
+				changed += post_procee_ner(data);
+				obj.put("ner", data.get("ner"));
+				obj.put("lem", data.get("lem"));
+				obj.put("tok", data.get("tok"));
+				obj.put("pos", data.get("pos"));
+				obs.add(obj);
+
+				if (obs.size() % 500 == 0) {
+					System.out.println(obs.size() + " " + name[name.length - 1]);
+					obj.keys().forEachRemaining(k -> {
+						System.out.println(k + ": " + obj.get(k));
+					});
+				}
+
+				//read remaining e.g. AMR graph
+				StringBuilder post = new StringBuilder();
+				line = br.readLine();
+				while (line != null && !line.trim().isEmpty()) {
+					post.append(line + "\n");
+					line = br.readLine();
+				}
+
+				obj.put("post", post.toString());
+
+				while (line != null && line.trim().isEmpty()) {
+					line = br.readLine();
+				}
+
+			}
+			System.out.println("\n" + name[name.length - 1] + " done. Total sentences: " + obs.size() + "\n");
+			System.out.println("\n" + changed + " changed." + "\n");
+			String out = obs.stream().map(obj -> writeObject(obj)).collect(Collectors.joining("\n"));
+			Files.write(Paths.get(file.replaceAll(".txt(_[a-z]*)*", ".txt_pre_processed")), out.getBytes());
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (NullPointerException e) {
+			System.out.println(file + "  null pointer??");
+			System.out.println(line + "  null pointer??");
+			e.printStackTrace();
+		}
+	}
+	
+	//same as featureExtract, but have sentence only
+	public void featureExtractSentenceOnly(String file) {
+
+		StanfordCoreNLP pipeline = new StanfordCoreNLP(
+				PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner", "tokenize.options",
+						"splitHyphenated=true", "tokenize.whitespace", "true",
+						"ssplit.isOneSentence", "true", "tokenize.language", "en"));
+		String[] name = file.split("/");
+
+		String line = "";
+		try (FileInputStream fis = new FileInputStream(file);
+				BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));) {
+
+			System.out.println(name[name.length - 1]);
+			int n = 0;
+			int changed = 0;
+			List<JSONObject> obs = new ArrayList<JSONObject>();
+			line = br.readLine();
+			while (line != null && !line.trim().isEmpty()) {
+				// if (n % 2 == 0)
+				// System.out.println(n+"\n"+line);
+				n++;
+				JSONObject obj = new JSONObject();
+				StringBuilder pre = new StringBuilder();
+				obj.put("pre", pre.toString());
+				
+				//build a sentence without buggy texts....
+				String snt = line.replace("# ::tok ", "").replace("# ::snt ", "");
+		/*		if (snt.startsWith("the ones who are suffering are the ordinary people :")) {
+					System.out.println("!!!!\n"+snt+"\n!!!!");
+				}
+				snt = snt.replaceAll("\\.{2,}", "").replaceAll("  ", " ");
+				if (snt.startsWith("the ones who are suffering are the ordinary people :")) {
+					System.out.println("!!!!\n"+snt+"\n!!!!");
+				}
+				snt = snt.replace("  ", " ").replace("  ", " ").replace("\n", "");
+				snt = snt.replaceAll("\"", " \" ");
+				snt = snt.replaceAll("\\(", " \\( ");
+				snt = snt.replaceAll("\\)", " \\) ");
+				snt = snt.replaceAll("@-@", "-").replaceAll(" @:@ ", ":");*/
+				obj.put("snt", snt);
+					
+				//feature extraction and connecting of phrase, no change of tokenization
+				HashMap<String, LinkedList<String>> data = extractSentence(obj, pipeline,false);
+				if (snt.startsWith("the ones who are suffering are the ordinary people :")) {
+					System.out.println("!!!!\n"+snt+"\n!!!!");
+					System.out.println( data.get("tok"));
+				}
+				obj.put("ner", data.get("ner"));
+				obj.put("lem", data.get("lem"));
+				obj.put("tok", data.get("tok"));
+				obj.put("pos", data.get("pos"));
+				obs.add(obj);
+
+				if (obs.size() % 500 == 0) {
+					System.out.println(obs.size() + " " + name[name.length - 1]);
+					obj.keys().forEachRemaining(k -> {
+						System.out.println(k + ": " + obj.get(k));
+					});
+				}
+
+				StringBuilder post = new StringBuilder();
+				obj.put("post", post.toString());
+
+				line = br.readLine();
+				while (line != null && line.trim().isEmpty()) {
+					line = br.readLine();
+				}
+
+			}
+			System.out.println("\n" + name[name.length - 1] + " done. Total sentences: " + obs.size() + "\n");
+			System.out.println("\n" + changed + " changed." + "\n");
+			String out = obs.stream().map(obj -> writeObject(obj)).collect(Collectors.joining("\n"));
+			Files.write(Paths.get(file.replaceAll(".txt(_[a-z]*)*", ".txt_processed")), out.getBytes());
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (NullPointerException e) {
+			System.out.println(file + "  null pointer??");
+			System.out.println(line + "  null pointer??");
+			e.printStackTrace();
+		}
+	}
+
+	public String jsonArrayToWhiteSpaceString(JSONArray arr) {
+		String out = "";
+		for (Object s : arr) {
+			out += "\t" + s.toString();
+		}
+		return out.substring(1);
+	}
+
+	// output data to txt
+	public String writeObject(JSONObject obj) {
+		StringBuilder out = new StringBuilder();
+		if (!obj.getString("pre").trim().isEmpty())
+			out.append(obj.getString("pre"));
+		String tok = jsonArrayToWhiteSpaceString(obj.getJSONArray("tok"));
+		out.append("# ::tok\t" + tok + "\n");
+		String lemma = jsonArrayToWhiteSpaceString(obj.getJSONArray("lem"));
+		out.append("# ::lem\t" + lemma + "\n");
+		String pos = jsonArrayToWhiteSpaceString(obj.getJSONArray("pos"));
+		out.append("# ::pos\t" + pos + "\n");
+		String ner = jsonArrayToWhiteSpaceString(obj.getJSONArray("ner"));
+		out.append("# ::ner\t" + ner + "\n");
+
+		assert tok.split(" ").length == lemma.split(" ").length;
+		assert tok.split(" ").length == pos.split(" ").length;
+		assert tok.split(" ").length == ner.split(" ").length;
+
+		if (!obj.getString("post").trim().isEmpty())
+			out.append(obj.getString("post") + "\n");
+
+		return out.toString();
+	}
+
+	public volatile int positive = 0;
+	public volatile int truth = 0;
+	public volatile int truth_positive = 0;
+
+	public HashMap<String, LinkedList<String>> extractSentence(JSONObject obj, StanfordCoreNLP pipeline,boolean retoken) {
+		String text = obj.getString("snt");
+		obj.put("snt", text);
+		// create an empty Annotation just with the given text
+
+		Annotation sent = new Annotation(text);
+		HashMap<String, LinkedList<String>> data = new HashMap<String, LinkedList<String>>();
+		// run all Annotators on this text
+		pipeline.annotate(sent);
+		LinkedList<String> lemma = new LinkedList<String>();
+		LinkedList<String> tok = new LinkedList<String>();
+		LinkedList<String> ner = new LinkedList<String>();
+		LinkedList<String> pos = new LinkedList<String>();
+		String p_l = "";
+		String p_s = "";
+		String p_n = "";
+		String p_p = "";
+		int changed = 0;
+
+		List<CoreMap> sentences = sent.get(SentencesAnnotation.class);
+		Set<String> tmp = new HashSet<String>();
+		for (CoreMap sentence : sentences) {
+
+			for (int i = 0; i < sentence.get(TokensAnnotation.class).size(); i++) {
+				CoreLabel token = sentence.get(TokensAnnotation.class).get(i);
+				if (retoken  &&tmp.contains(token.get(LemmaAnnotation.class))
+						&& (!map.containsKey(lemma.getLast() + "-" + token.get(LemmaAnnotation.class)) //not x-y-z
+								|| (i + 1 < sentence.get(TokensAnnotation.class).size() - 1 && 
+										map.get(lemma.getLast() + "-" + token.get(LemmaAnnotation.class))
+										.contains(sentence.get(TokensAnnotation.class).get(i + 1)
+												.get(LemmaAnnotation.class))
+
+								))) {
+					p_s = tok.removeLast();
+					p_l = lemma.removeLast();
+					p_p = pos.removeLast();
+					p_n = ner.removeLast();
+					changed = 1;
+					tok.add(p_s + "-" + token.get(TextAnnotation.class));
+					lemma.add(p_l + "-" + token.get(LemmaAnnotation.class).toLowerCase());
+					pos.add("COMP");
+					ner.add("O");
+				} else {
+
+					tok.add(token.get(TextAnnotation.class));
+					lemma.add(token.get(LemmaAnnotation.class).toLowerCase());
+					pos.add(token.get(PartOfSpeechAnnotation.class));
+					if (lemma.get(lemma.size() - 1).contains("www.") || lemma.get(lemma.size() - 1).contains("http"))
+						ner.add("URL");
+					else
+						ner.add(token.get(NamedEntityTagAnnotation.class));
+
+				}
+				tmp = map.getOrDefault(lemma.getLast(), new HashSet<String>());
+			}
+
+		}
+		assert ner.size() == lemma.size() && lemma.size() == tok.size() && tok.size() == pos.size();
+		data.put("lem", lemma);
+		data.put("tok", tok);
+		data.put("pos", pos);
+		data.put("ner", ner);
+		return data;
+
+	}
+
+	private String[] tobehashed = { "hundred", "thousand", "million", "billion", "trillion", "hundreds", "thousands",
+			"millions", "billions", "trillions", "-" };
+	private HashSet<String> num_txts = new HashSet<>(Arrays.asList(tobehashed));
+
+	public boolean number_read(String old, String t) {
+		return num_txts.contains(t) && !old.equals("-") && !t.equals("-");
+	}
+
+	public int post_procee_number(HashMap<String, LinkedList<String>> obj) {
+		LinkedList<String> ner_ = (LinkedList<String>) obj.get("ner");
+		LinkedList<String> lemma_ = (LinkedList<String>) obj.get("lem");
+		LinkedList<String> tok_ = (LinkedList<String>) obj.get("tok");
+		LinkedList<String> pos_ = (LinkedList<String>) obj.get("pos");
+		String p_l = "";
+		String p_t = "";
+		String p_n = "";
+		String p_p = "";
+		LinkedList<String> lemma = new LinkedList<String>();
+		LinkedList<String> tok = new LinkedList<String>();
+		LinkedList<String> ner = new LinkedList<String>();
+		LinkedList<String> pos = new LinkedList<String>();
+		int out = 0;
+		for (int i = 0; i < lemma_.size(); i++) {
+			if (pos.isEmpty() || !pos_.get(i).equals("CD") || (!pos.isEmpty() && !pos.getLast().equals("CD"))
+					|| (!number_read(lemma.getLast(), lemma_.get(i)))) {
+
+				lemma.add(lemma_.get(i));
+				tok.add(tok_.get(i));
+				ner.add(ner_.get(i));
+				pos.add(pos_.get(i));
+			} else {
+				if (lemma.getLast().equals("-")) {
+					System.out.println("!!!" + lemma.getLast() + " " + lemma_.get(i));
+					System.out.println("!!!" + tok_);
+					System.out.println("!!!" + pos_);
+				}
+				out += 1;
+				p_t = tok.removeLast();
+				p_l = lemma.removeLast();
+				p_p = pos.removeLast();
+				p_n = ner.removeLast();
+
+				tok.add(p_t + "," + tok_.get(i));
+				lemma.add(p_l + "," + lemma_.get(i));
+				pos.add("CD");
+				ner.add(p_n);
+			}
+		}
+		obj.put("lem", lemma);
+		obj.put("tok", tok);
+		obj.put("pos", pos);
+		obj.put("ner", ner);
+		return out;
+	}
+
+	public int post_procee_ner(HashMap<String, LinkedList<String>> obj) {
+		LinkedList<String> ner_ = obj.get("ner");
+		LinkedList<String> lemma_ = obj.get("lem");
+		LinkedList<String> tok_ = obj.get("tok");
+		LinkedList<String> pos_ = obj.get("pos");
+		String p_l = "";
+		String p_t = "";
+		String p_n = "";
+		String p_p = "";
+		LinkedList<String> lemma = new LinkedList<String>();
+		LinkedList<String> tok = new LinkedList<String>();
+		LinkedList<String> ner = new LinkedList<String>();
+		LinkedList<String> pos = new LinkedList<String>();
+
+		Set<String> tmp = new HashSet<String>();
+		int out = 0;
+		boolean last = false;
+		for (int i = 0; i < lemma_.size(); i++) {
+			if (( !ner_.get(i).equals("O")) && ( lemma_.get(i).equals("'s") ||lemma_.get(i).equals("-")|| last)
+					&& !ner.isEmpty() && ner.getLast().equals(ner_.get(i))) {
+
+				p_t = tok.removeLast();
+				p_l = lemma.removeLast();
+				p_p = pos.removeLast();
+				p_n = ner.removeLast();
+				last = lemma_.get(i).equals("-");
+				out += 1;
+				tok.add(p_t + tok_.get(i));
+				lemma.add(p_l + lemma_.get(i));
+				pos.add(p_p);
+				ner.add(p_n);
+			} else {
+				last = false;
+				lemma.add(lemma_.get(i));
+				tok.add(tok_.get(i));
+				ner.add(ner_.get(i));
+				pos.add(pos_.get(i));
+			}
+		}
+		obj.put("lem", lemma);
+		obj.put("tok", tok);
+		obj.put("pos", pos);
+		obj.put("ner", ner);
+		return out;
+	}
+
+	public void featureExtractFolder(String folder, String suffix) {
+		List<String> files = convertingAMR.folderToFilesPath(folder, suffix);
+		files.parallelStream().forEach(file -> featureExtract(file));
+	}
+
+	public void featureExtractFolderSentenceOnly(String folder, String suffix) {
+		List<String> files = convertingAMR.folderToFilesPath(folder, suffix);
+		files.parallelStream().forEach(file -> featureExtractSentenceOnly(file));
+
+	}
+
+	public static void main(String[] args) {
+		String home = System.getProperty("user.home");  //change this accordingly
+
+		convertingAMR convetor = new convertingAMR("joints.txt");
+
+		System.out.println("Processing r2");
+		System.out.println("Processing Dev");
+		convetor.featureExtractFolder(home + "/Data/amr_annotation_r2/data/alignments/split/dev/", "combined.txt_");
+		System.out.println("Processing Training");
+		convetor.featureExtractFolder(home + "/Data/amr_annotation_r2/data/alignments/split/training/", "combined.txt_");
+		System.out.println("Processing Test");
+		convetor.featureExtractFolder(home + "/Data/amr_annotation_r2/data/alignments/split/test/", "combined.txt_");
+
+
+
+	}
+
+}
diff --git a/AMR_FEATURE/src/json-20170516.jar b/AMR_FEATURE/src/json-20170516.jar
new file mode 100644
index 0000000..5033f66
Binary files /dev/null and b/AMR_FEATURE/src/json-20170516.jar differ
diff --git a/README.md b/README.md
index 87ef34a..74e1a33 100644
--- a/README.md
+++ b/README.md
@@ -25,8 +25,10 @@ If you use our code, please cite our paper as follows:
 
 ##Preprocessing:
 Combine all *.txt files into a single one, and use stanford corenlp to extract ner, pos and lemma.
-Processed file saved in the same folder.
+Processed file saved in the same folder. 
 `python src/preprocessing.py `
+or Process from [AMR-to-English aligner](https://www.isi.edu/natural-language/mt/amr_eng_align.pdf) using java script in AMR_FEATURE (I used eclipse to run it)
+
 Build the copying dictionary and recategorization system (can skip as they are in data/).
 `python src/rule_system_build.py `
 Build data into tensor.
@@ -57,7 +59,7 @@ Keeping the files under data/ folder unchanged, download [model](https://drive.g
 Should allow one to run parsing.
 
 ##Notes
-This code starts with sentence original AMR files, while the paper version is trained on tokenized version provided by [AMR-to-English aligner](https://www.isi.edu/natural-language/mt/amr_eng_align.pdf)
+This python src/preprocessing.py starts with sentence original AMR files, while the paper version is trained on tokenized version provided by [AMR-to-English aligner](https://www.isi.edu/natural-language/mt/amr_eng_align.pdf)
 So the results could be slightly different.
 
 ## Contact
diff --git a/data/aux_dict b/data/aux_dict
index 618aa65..eb885c3 100644
Binary files a/data/aux_dict and b/data/aux_dict differ
diff --git a/data/category_dict b/data/category_dict
index a4e0192..a20987e 100644
Binary files a/data/category_dict and b/data/category_dict differ
diff --git a/data/lemma_dict b/data/lemma_dict
index c4fbf06..79dad8d 100644
Binary files a/data/lemma_dict and b/data/lemma_dict differ
diff --git a/data/ner_dict b/data/ner_dict
index 3b90e69..c4f2423 100644
Binary files a/data/ner_dict and b/data/ner_dict differ
diff --git a/data/pos_dict b/data/pos_dict
index 11881f5..8332c07 100644
Binary files a/data/pos_dict and b/data/pos_dict differ
diff --git a/data/rel_dict b/data/rel_dict
index 83d01a0..b3f9098 100644
Binary files a/data/rel_dict and b/data/rel_dict differ
diff --git a/data/sensed_dict b/data/sensed_dict
index a53266d..b474c18 100644
Binary files a/data/sensed_dict and b/data/sensed_dict differ
diff --git a/data/word_dict b/data/word_dict
index 27091cc..e41b97e 100644
Binary files a/data/word_dict and b/data/word_dict differ
diff --git a/utility/__pycache__/Naive_Scores.cpython-36.pyc b/utility/__pycache__/Naive_Scores.cpython-36.pyc
index b795110..6ca58fa 100644
Binary files a/utility/__pycache__/Naive_Scores.cpython-36.pyc and b/utility/__pycache__/Naive_Scores.cpython-36.pyc differ
diff --git a/utility/__pycache__/constants.cpython-36.pyc b/utility/__pycache__/constants.cpython-36.pyc
index ce1518a..b781b9e 100644
Binary files a/utility/__pycache__/constants.cpython-36.pyc and b/utility/__pycache__/constants.cpython-36.pyc differ
diff --git a/utility/constants.py b/utility/constants.py
index 69be8c5..9472973 100644
--- a/utility/constants.py
+++ b/utility/constants.py
@@ -3,7 +3,7 @@
 
 # Change the path according to your system
 
-save_to = 'model/'    #the folder amr model will be saved to  (model name is parameterized by some hyper parameter)
+save_to = '/disk/scratch/s1544871/model/'    #the folder amr model will be saved to  (model name is parameterized by some hyper parameter)
 train_from = 'model/gpus_0valid_best.pt'  #default model loading
 embed_path = "/disk/scratch/s1544871/glove.840B.300d.txt"    #file containing glove embedding
 core_nlp_url = 'http://localhost:9000'     #local host url of standford corenlp server