Browse files

Pretty sure there isn't a single mention of anything particular to us…

… here. Should be ready for release on github. :)
  • Loading branch information...
1 parent bb1f1f1 commit 1300c24c665a770e1adbbcc58443ba10fc769802 @dmnapolitano dmnapolitano committed Mar 14, 2013
View
20 build.xml
@@ -6,23 +6,20 @@
<property name="genjava" location="gen-java" />
<property name="genpy" location="gen-py" />
<property name="build" location="build" />
-
+ <property environment="env"/>
+
<path id="libs.classpath">
- <fileset dir="/home/nlp-text/dynamic/NLPTools/thrift">
- <include name="*.jar" />
- </fileset>
- <fileset dir="/home/nlp-text/dynamic/NLPTools/stanford-core">
- <include name="stanford-corenlp-1.3.4.jar" />
- </fileset>
+ <pathelement path="${env.CLASSPATH}" />
</path>
<path id="build.classpath">
<path refid="libs.classpath" />
<pathelement path="${genjava}" />
</path>
+ <!-- Set $THRIFTPATH to the root of your thrift installation. -->
<target name="combine_thrift_jars">
- <zip destfile="/home/nlp-text/dynamic/NLPTools/thrift/all_thrift_jars.jar">
- <zipgroupfileset dir="/home/nlp-text/dynamic/NLPTools/thrift/lib" includes="*.jar"/>
+ <zip destfile="${env.THRIFTPATH}/all_thrift_jars.jar">
+ <zipgroupfileset dir="${env.THRIFTPATH}/lib" includes="*.jar"/>
</zip>
</target>
@@ -35,10 +32,11 @@
<target name="generate">
<!-- Generate the thrift gen-java and gen-py source -->
- <exec executable="/home/nlp-text/dynamic/NLPTools/thrift/bin/thrift" failonerror="true">
+ <!-- Assumes thrift is on your path. -->
+ <exec executable="thrift" failonerror="true">
<arg line="--gen java corenlp.thrift"/>
</exec>
- <exec executable="/home/nlp-text/dynamic/NLPTools/thrift/bin/thrift" failonerror="true">
+ <exec executable="thrift" failonerror="true">
<arg line="--gen py:utf8strings,slots,new_style corenlp.thrift"/>
</exec>
</target>
View
BIN gen-py/corenlp/StanfordCoreNLP.pyc
Binary file not shown.
View
BIN gen-py/corenlp/__init__.pyc
Binary file not shown.
View
BIN gen-py/corenlp/ttypes.pyc
Binary file not shown.
View
4 scripts/parse-java.sh
@@ -1,12 +1,12 @@
#!/bin/bash
-CMD="java -cp $CLASSPATH StanfordParserClient"
+CMD="java -cp $CLASSPATH:stanford-corenlp-wrapper.jar StanfordCoreNLPClient"
if [ $# -eq 3 ]; then
SERVER=$1
PORT=$2
INFILE=$3
$CMD $SERVER $PORT $INFILE
else
- echo "Usage: parse.sh <server> <port> [<inputfile>]"
+ echo "Usage: parse-java.sh <server> <port> [<inputfile>]"
fi
View
12 scripts/parse-python.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-CMD="python parser_client.py"
-
-if [ $# -eq 3 ]; then
- SERVER=$1
- PORT=$2
- INFILE=$3
- $CMD $SERVER $PORT $INFILE
-else
- echo "Usage: parse.sh <server> <port> [<inputfile>]"
-fi
View
129 scripts/parser_client.py
@@ -1,52 +1,77 @@
-#!/usr/bin/env python
-
-import sys
-
-from stanfordparser import StanfordParser
-from stanfordparser.ttypes import *
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-
-
-# get command line arguments
-args = sys.argv[1:]
-if len(args) != 3:
- sys.stderr.write('Usage: StanfordParserClient.py <server> <port> <inputfile>\n')
- sys.exit(2)
-else:
- server = args[0]
- port = int(args[1])
- infile = args[2]
-
-
-try:
-
- # Make socket
- transport = TSocket.TSocket(server, port)
-
- # Buffering is critical. Raw sockets are very slow
- transport = TTransport.TBufferedTransport(transport)
-
- # Wrap in a protocol
- protocol = TBinaryProtocol.TBinaryProtocol(transport)
-
- # Create a client to use the protocol encoder
- client = StanfordParser.Client(protocol)
-
- # Connect!
- transport.open()
-
- # parse each line of the file
- with open(infile, 'r') as f:
- for line in f:
- parse = client.parse_sentence(line)
- sys.stdout.write(parse + '\n')
-
- # Close!
- transport.close()
-
-except Thrift.TException, tx:
- print '%s' % (tx.message)
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# The purpose of this client is to show how to send over a few sentences in every way possible.
+# It is also for me to unit test things. >:)
+
+from corenlp import StanfordCoreNLP
+from corenlp.ttypes import *
+from thrift import Thrift
+from thrift.transport import TSocket, TTransport
+from thrift.protocol import TBinaryProtocol
+
+import sys
+
+
+# get command line arguments
+args = sys.argv[1:]
+if len(args) != 2:
+ sys.stderr.write('Usage: parser_client.py <server> <port>\n')
+ sys.exit(2)
+else:
+ server = args[0]
+ port = int(args[1])
+
+
+# Good for testing long sentences
+#sentences = ["Well aware that the opinions and belief of men depend not on their own will , but follow involuntarily the evidence proposed to their minds ; that Almighty God hath created the mind free , and manifested his supreme will that free it shall remain by making it altogether insusceptible of restraint ; that all attempts to influence it by temporal punishments , or burthens , or by civil incapacitations , tend only to beget habits of hypocrisy and meanness , and are a departure from the plan of the holy author of our religion , who being lord both of body and mind , yet chose not to propagate it by coercions on either , as was in his almighty power to do , but to exalt it by its influence on reason alone ; that the impious presumption of legislature and ruler , civil as well as ecclesiastical , who , being themselves but fallible and uninspired men , have assumed dominion over the faith of others , setting up their own opinions and modes of thinking as the only true and infallible , and as such endeavoring to impose them on others , hath established and maintained false religions over the greatest part of the world and through all time : that to compel a man to furnish contributions of money for the propagation of opinions which he disbelieves and abhors is sinful and tyrannical ; that even the forcing him to support this or that teacher of his own religious persuasion is depriving him of the comfortable liberty of giving his contributions to the particular pastor whose morals he would make his pattern and whose powers he feels most persuasive to righteousness , and is withdrawing from the ministry those temporary rewards which , proceeding from an approbation of their personal conduct , are an additional incitement to earnest and unremitting labors for the instruction of mankind ; that our civil rights have no dependence on our religious opinions , any more than our opinions in physics or geometry ; and therefore the proscribing any citizen as unworthy the public confidence by laying upon him an incapacity of being called to offices of trust or emolument , unless he profess or renounce this or that religious opinion , is depriving him injudiciously of those privileges and advantages to which , in common with his fellow citizens , he has a natural right ; that it tends also to corrupt the principles of that very religion it is meant to encourage , by bribing with a monopoly of worldly honors and emoluments those who will externally profess and conform to it ; that though indeed these are criminals who do not withstand such temptation , yet neither are those innocent who lay the bait in their way ; that the opinions of men are not the object of civil government , nor under its jurisdiction ; that to suffer the civil magistrate to intrude his powers into the field of opinion and to restrain the profession or propagation of principles on supposition of their ill tendency is a dangerous fallacy , which at once destroys all religious liberty , because he being of course judge of that tendency will make his opinions the rule of judgment and approve or condemn the sentiments of others only as they shall square with or suffer from his own ; that it is time enough for the rightful purposes of civil government for its officers to interfere when principles break out into overt acts against peace and good order ; and finally , that the truth is great and will prevail if left to herself ; that she is the proper and sufficient antagonist to error , and has nothing to fear from the conflict unless by human interposition disarmed of her natural weapons , free argument and debate ; errors ceasing to be dangerous when it is permitted freely to contradict them ."]
+
+# Taken from the English Wikipedia entry for "Fox". :)
+arbitrary_text = u"Members of about 37 species are referred to as foxes, of which only 12 species actually belong to the Vulpes genus of \"true foxes\". By far the most common and widespread species of fox is the red fox (Vulpes vulpes), although various species are found on almost every continent. The presence of fox-like carnivores all over the globe, together with their widespread reputation for cunning, has contributed to their appearance in popular culture and folklore in many societies around the world (see also Foxes in culture). The hunting of foxes with packs of hounds, long an established pursuit in Europe, especially the British Isles, was exported by European settlers to various parts of the New World."
+
+tokenized_sentences = [u"Members of about 37 species are referred to as foxes , of which only 12 species actually belong to the Vulpes genus of `` true foxes '' .".split(" "),
+ u"By far the most common and widespread species of fox is the red fox -LRB- Vulpes vulpes -RRB- , although various species are found on almost every continent .".split(" "),
+ u"The presence of fox-like carnivores all over the globe , together with their widespread reputation for cunning , has contributed to their appearance in popular culture and folklore in many societies around the world -LRB- see also Foxes in culture -RRB- .".split(" "),
+ u"The hunting of foxes with packs of hounds , long an established pursuit in Europe , especially the British Isles , was exported by European settlers to various parts of the New World .".split(" ")]
+
+
+# Make socket
+transport = TSocket.TSocket(server, port)
+
+# Buffering is critical. Raw sockets are very slow
+transport = TTransport.TBufferedTransport(transport)
+
+# Wrap in a protocol
+protocol = TBinaryProtocol.TBinaryProtocol(transport)
+
+# Create a client to use the protocol encoder
+client = StanfordCoreNLP.Client(protocol)
+
+# Connect!
+transport.open()
+
+# This list is for options for how we'd like the output formatted. See README.md for the full list of possible options.
+# Note that the DEFAULT is what you would get if you specified "oneline" on the command line, or "None" here.
+#outputOptions = ["-outputFormat", "typedDependencies,penn", "-outputFormatOptions", "basicDependencies"]
+outputOptions = None
+#outputOptions = ["-outputFormat", "oneline"] # Same as specifying "None", as above.
+
+
+try:
+ parse_trees = client.parse_text(arbitrary_text, outputOptions)
+ for result in parse_trees:
+ sys.stdout.write(result.tree.strip() + " [" + str(result.score) + "]\n")
+except Exception as e:
+ print e
+
+print
+
+for sentence in tokenized_sentences:
+ try:
+ tree = client.parse_tokens(sentence, outputOptions)
+ sys.stdout.write(tree.tree.strip() + " [" + str(tree.score) + "]\n")
+ except Exception as e:
+ print e
+
+# All done
+transport.close()
View
77 scripts/parser_client_with_expunct.py
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# The purpose of this client is to show how to send over a few sentences as you tokenize them with expunct.
-# It is also for me to unit test things. >:)
-# For an alternative way to call the parser (with Python), please see parser_client.py.
-
-from corenlp import StanfordCoreNLP
-from corenlp.ttypes import *
-from thrift import Thrift
-from thrift.transport import TSocket, TTransport
-from thrift.protocol import TBinaryProtocol
-
-from bs4 import UnicodeDammit
-import re
-import sys
-
-import expunct
-
-# get command line arguments
-args = sys.argv[1:]
-if len(args) != 2:
- sys.stderr.write('Usage: parser_client_with_expunct.py <server> <port>\n')
- sys.exit(2)
-else:
- server = args[0]
- port = int(args[1])
-
-
-# Good for testing long sentences
-#sentences = ["Well aware that the opinions and belief of men depend not on their own will , but follow involuntarily the evidence proposed to their minds ; that Almighty God hath created the mind free , and manifested his supreme will that free it shall remain by making it altogether insusceptible of restraint ; that all attempts to influence it by temporal punishments , or burthens , or by civil incapacitations , tend only to beget habits of hypocrisy and meanness , and are a departure from the plan of the holy author of our religion , who being lord both of body and mind , yet chose not to propagate it by coercions on either , as was in his almighty power to do , but to exalt it by its influence on reason alone ; that the impious presumption of legislature and ruler , civil as well as ecclesiastical , who , being themselves but fallible and uninspired men , have assumed dominion over the faith of others , setting up their own opinions and modes of thinking as the only true and infallible , and as such endeavoring to impose them on others , hath established and maintained false religions over the greatest part of the world and through all time : that to compel a man to furnish contributions of money for the propagation of opinions which he disbelieves and abhors is sinful and tyrannical ; that even the forcing him to support this or that teacher of his own religious persuasion is depriving him of the comfortable liberty of giving his contributions to the particular pastor whose morals he would make his pattern and whose powers he feels most persuasive to righteousness , and is withdrawing from the ministry those temporary rewards which , proceeding from an approbation of their personal conduct , are an additional incitement to earnest and unremitting labors for the instruction of mankind ; that our civil rights have no dependence on our religious opinions , any more than our opinions in physics or geometry ; and therefore the proscribing any citizen as unworthy the public confidence by laying upon him an incapacity of being called to offices of trust or emolument , unless he profess or renounce this or that religious opinion , is depriving him injudiciously of those privileges and advantages to which , in common with his fellow citizens , he has a natural right ; that it tends also to corrupt the principles of that very religion it is meant to encourage , by bribing with a monopoly of worldly honors and emoluments those who will externally profess and conform to it ; that though indeed these are criminals who do not withstand such temptation , yet neither are those innocent who lay the bait in their way ; that the opinions of men are not the object of civil government , nor under its jurisdiction ; that to suffer the civil magistrate to intrude his powers into the field of opinion and to restrain the profession or propagation of principles on supposition of their ill tendency is a dangerous fallacy , which at once destroys all religious liberty , because he being of course judge of that tendency will make his opinions the rule of judgment and approve or condemn the sentiments of others only as they shall square with or suffer from his own ; that it is time enough for the rightful purposes of civil government for its officers to interfere when principles break out into overt acts against peace and good order ; and finally , that the truth is great and will prevail if left to herself ; that she is the proper and sufficient antagonist to error , and has nothing to fear from the conflict unless by human interposition disarmed of her natural weapons , free argument and debate ; errors ceasing to be dangerous when it is permitted freely to contradict them ."]
-
-# Good for testing what is probably the most common use-case. Taken from the English Wikipedia entry for "Fox". :)
-sentences = ["Members of about 37 species are referred to as foxes, of which only 12 species actually belong to the Vulpes genus of \"true foxes\".", "By far the most common and widespread species of fox is the red fox (Vulpes vulpes), although various species are found on almost every continent.", "The presence of fox-like carnivores all over the globe, together with their widespread reputation for cunning, has contributed to their appearance in popular culture and folklore in many societies around the world (see also Foxes in culture).", "The hunting of foxes with packs of hounds, long an established pursuit in Europe, especially the British Isles, was exported by European settlers to various parts of the New World."]
-
-# for testing named entity systems
-#sentences = ["My name is Diane and I live in New Jersey.", "I sometimes go to New York.", "The Food and Drug Administration is an organization."]
-
-#sentences = [u"During reconstruction , he found that certain tokenization decisions might be non-standard and impact the accuracy of the Stanford parser that we plan to use to extract features , such as separating hyphens and apostrophes with spaces , as in today ‘s and co – workers ."]
-#sentences = [u"During reconstruction, he found that certain tokenization decisions might be non-standard and impact the accuracy of the Stanford parser that we plan to use to extract features, such as separating hyphens and apostrophes with spaces, as in today's and co–workers."]
-
-transport = TSocket.TSocket(server, port)
-transport = TTransport.TBufferedTransport(transport)
-protocol = TBinaryProtocol.TBinaryProtocol(transport)
-client = StanfordCoreNLP.Client(protocol)
-
-transport.open()
-
-# This list is for options for how we'd like the output formatted. See README.md for the full list of possible options.
-# Note that the DEFAULT is what you would get if you specified "oneline" on the command line, or "None" here.
-#outputOptions = ["-outputFormat", "typedDependencies,penn", "-outputFormatOptions", "basicDependencies"]
-outputOptions = None
-#outputOptions = ["-outputFormat", "oneline"]
-
-for sentence in sentences:
- #sentence = UnicodeDammit(sentence, ["windows-1252", "utf8"]).unicode_markup
- sentence = expunct.word_tokenize(sentence, ptb_normalization=True) # You ought to use this flag if interacting with Stanford anything afterwards
- # if pre-Expuncted
- #sentence = sentence.split(" ")
- try:
- tree = client.parse_tokens(sentence, outputOptions)
- tree = tree.tree.strip()
-
- # Sometimes a sentence you send over as one sentence is interpreted as two by the Stanford Parser.
- # This bit of code is here to make sure no parse trees are lost in the results when the results
- # are of the default "oneline" format. This is particularly important if these results are going
- # into say, a Python list, where each element is a parse tree.
- if "\n(ROOT" in tree:
- for t in re.split(r'(\n\(ROOT.+)', tree):
- if len(t) > 0:
- print t.strip()
- else:
- print tree
- except Exception as e:
- print e
-
-transport.close()
View
BIN stanford-corenlp-wrapper.jar
Binary file not shown.

0 comments on commit 1300c24

Please sign in to comment.