This repository has been archived by the owner on Apr 14, 2021. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds NLP project (lib) in preparation for operation Ghamhilation.
- Loading branch information
1 parent
0a6d272
commit a7920a9
Showing
83 changed files
with
37,475 additions
and
19,490 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> | ||
<PropertyGroup> | ||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> | ||
<ProjectGuid>{FF9060CA-0692-4170-A98B-C01BB03E726A}</ProjectGuid> | ||
<OutputType>Library</OutputType> | ||
<AppDesignerFolder>Properties</AppDesignerFolder> | ||
<RootNamespace>NLP</RootNamespace> | ||
<AssemblyName>NLP</AssemblyName> | ||
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion> | ||
<FileAlignment>512</FileAlignment> | ||
<TargetFrameworkProfile /> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> | ||
<DebugSymbols>true</DebugSymbols> | ||
<DebugType>full</DebugType> | ||
<Optimize>false</Optimize> | ||
<OutputPath>bin\Debug\</OutputPath> | ||
<DefineConstants>DEBUG;TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> | ||
<DebugType>pdbonly</DebugType> | ||
<Optimize>true</Optimize> | ||
<OutputPath>bin\Release\</OutputPath> | ||
<DefineConstants>TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<Reference Include="IKVM.AWT.WinForms"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.AWT.WinForms.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Beans"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Beans.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Charsets"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Charsets.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Cldrdata"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Cldrdata.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Corba"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Corba.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Core"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Core.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Jdbc"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Jdbc.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Localedata"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Localedata.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Management"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Management.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Media"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Media.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Misc"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Misc.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Naming"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Naming.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Nashorn"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Nashorn.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Remoting"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Remoting.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Security"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Security.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.SwingAWT"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.SwingAWT.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Text"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Text.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Tools"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Tools.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.Util"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.Util.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.API"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.API.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.Bind"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.Bind.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.Crypto"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.Crypto.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.Parse"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.Parse.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.Transform"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.Transform.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.WebServices"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.WebServices.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.OpenJDK.XML.XPath"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.OpenJDK.XML.XPath.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.Runtime"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.Runtime.dll</HintPath> | ||
</Reference> | ||
<Reference Include="IKVM.Runtime.JNI"> | ||
<HintPath>..\packages\IKVM.8.0.5449.0\lib\IKVM.Runtime.JNI.dll</HintPath> | ||
</Reference> | ||
<Reference Include="stanford-postagger-3.5.0"> | ||
<HintPath>..\packages\Stanford.NLP.POSTagger.3.5.0.0\lib\stanford-postagger-3.5.0.dll</HintPath> | ||
</Reference> | ||
<Reference Include="System" /> | ||
<Reference Include="System.Core" /> | ||
<Reference Include="System.Xml.Linq" /> | ||
<Reference Include="System.Data.DataSetExtensions" /> | ||
<Reference Include="Microsoft.CSharp" /> | ||
<Reference Include="System.Data" /> | ||
<Reference Include="System.Xml" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Compile Include="POST.cs" /> | ||
<Compile Include="Properties\AssemblyInfo.cs" /> | ||
<Compile Include="Properties\Resources.Designer.cs"> | ||
<AutoGen>True</AutoGen> | ||
<DesignTime>True</DesignTime> | ||
<DependentUpon>Resources.resx</DependentUpon> | ||
</Compile> | ||
<Compile Include="StringTools.cs" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<None Include="packages.config" /> | ||
<None Include="Resources\wsj-0-18-bidirectional-nodistsim.tagger" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<EmbeddedResource Include="Properties\Resources.resx"> | ||
<Generator>ResXFileCodeGenerator</Generator> | ||
<LastGenOutput>Resources.Designer.cs</LastGenOutput> | ||
</EmbeddedResource> | ||
</ItemGroup> | ||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> | ||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it. | ||
Other similar extension points exist, see Microsoft.Common.targets. | ||
<Target Name="BeforeBuild"> | ||
</Target> | ||
<Target Name="AfterBuild"> | ||
</Target> | ||
--> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using System.Text.RegularExpressions; | ||
using System.IO; | ||
using java.io; | ||
using java.util; | ||
using edu.stanford.nlp.ling; | ||
using edu.stanford.nlp.tagger.maxent; | ||
using File = System.IO.File; | ||
|
||
|
||
|
||
namespace NLP | ||
{ | ||
public class POST | ||
{ | ||
private MaxentTagger tagger; | ||
private Regex tags = new Regex(@"_(C[CD]|DT|EX|FW|IN|JJ[SR]?|LS|MD|NN([PS]|PS)?|P(DT|OS|RP\$?)|R(B[RS]?|P)|SYM|TO|UH|VB[DGNPZ]?|W(DT|P\$?|RB)|[#$(),.:]|\'\'|\`\`)\s", RegexOptions.Compiled | RegexOptions.CultureInvariant); | ||
|
||
|
||
|
||
public POST() | ||
{ | ||
var modelPath = Path.Combine(Path.GetDirectoryName(new Uri(System.Reflection.Assembly.GetExecutingAssembly().CodeBase).LocalPath), "wsj-0-18-bidirectional-nodistsim.tagger"); | ||
|
||
if (!File.Exists(modelPath)) | ||
{ | ||
File.WriteAllBytes(modelPath, NLP.Properties.Resources.wsj_0_18_bidirectional_nodistsim); | ||
} | ||
|
||
tagger = new MaxentTagger(modelPath); | ||
} | ||
|
||
public string TagString(string input, bool tagsOnly = true) | ||
{ | ||
var tagged = tagger.tagString(input); | ||
|
||
if (tagsOnly) | ||
{ | ||
var final = ""; | ||
|
||
foreach (Match match in tags.Matches(tagged)) | ||
{ | ||
var tag = match.Value.Remove(0, 1); | ||
final += tag; | ||
} | ||
|
||
return final.TrimEnd(); | ||
} | ||
|
||
return tagged; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
using System.Reflection; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
|
||
// General Information about an assembly is controlled through the following | ||
// set of attributes. Change these attribute values to modify the information | ||
// associated with an assembly. | ||
[assembly: AssemblyTitle("NLP")] | ||
[assembly: AssemblyDescription("")] | ||
[assembly: AssemblyConfiguration("")] | ||
[assembly: AssemblyCompany("")] | ||
[assembly: AssemblyProduct("NLP")] | ||
[assembly: AssemblyCopyright("Copyright © 2015")] | ||
[assembly: AssemblyTrademark("")] | ||
[assembly: AssemblyCulture("")] | ||
|
||
// Setting ComVisible to false makes the types in this assembly not visible | ||
// to COM components. If you need to access a type in this assembly from | ||
// COM, set the ComVisible attribute to true on that type. | ||
[assembly: ComVisible(false)] | ||
|
||
// The following GUID is for the ID of the typelib if this project is exposed to COM | ||
[assembly: Guid("2c9e00b6-19c9-4243-b5bd-17e75750fd07")] | ||
|
||
// Version information for an assembly consists of the following four values: | ||
// | ||
// Major Version | ||
// Minor Version | ||
// Build Number | ||
// Revision | ||
// | ||
// You can specify all the values or you can default the Build and Revision Numbers | ||
// by using the '*' as shown below: | ||
// [assembly: AssemblyVersion("1.0.*")] | ||
[assembly: AssemblyVersion("1.0.0.0")] | ||
[assembly: AssemblyFileVersion("1.0.0.0")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<root> | ||
<!-- | ||
Microsoft ResX Schema | ||
Version 2.0 | ||
The primary goals of this format is to allow a simple XML format | ||
that is mostly human readable. The generation and parsing of the | ||
various data types are done through the TypeConverter classes | ||
associated with the data types. | ||
Example: | ||
... ado.net/XML headers & schema ... | ||
<resheader name="resmimetype">text/microsoft-resx</resheader> | ||
<resheader name="version">2.0</resheader> | ||
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader> | ||
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader> | ||
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data> | ||
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data> | ||
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64"> | ||
<value>[base64 mime encoded serialized .NET Framework object]</value> | ||
</data> | ||
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64"> | ||
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value> | ||
<comment>This is a comment</comment> | ||
</data> | ||
There are any number of "resheader" rows that contain simple | ||
name/value pairs. | ||
Each data row contains a name, and value. The row also contains a | ||
type or mimetype. Type corresponds to a .NET class that support | ||
text/value conversion through the TypeConverter architecture. | ||
Classes that don't support this are serialized and stored with the | ||
mimetype set. | ||
The mimetype is used for serialized objects, and tells the | ||
ResXResourceReader how to depersist the object. This is currently not | ||
extensible. For a given mimetype the value must be set accordingly: | ||
Note - application/x-microsoft.net.object.binary.base64 is the format | ||
that the ResXResourceWriter will generate, however the reader can | ||
read any of the formats listed below. | ||
mimetype: application/x-microsoft.net.object.binary.base64 | ||
value : The object must be serialized with | ||
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter | ||
: and then encoded with base64 encoding. | ||
mimetype: application/x-microsoft.net.object.soap.base64 | ||
value : The object must be serialized with | ||
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter | ||
: and then encoded with base64 encoding. | ||
mimetype: application/x-microsoft.net.object.bytearray.base64 | ||
value : The object must be serialized into a byte array | ||
: using a System.ComponentModel.TypeConverter | ||
: and then encoded with base64 encoding. | ||
--> | ||
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata"> | ||
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" /> | ||
<xsd:element name="root" msdata:IsDataSet="true"> | ||
<xsd:complexType> | ||
<xsd:choice maxOccurs="unbounded"> | ||
<xsd:element name="metadata"> | ||
<xsd:complexType> | ||
<xsd:sequence> | ||
<xsd:element name="value" type="xsd:string" minOccurs="0" /> | ||
</xsd:sequence> | ||
<xsd:attribute name="name" use="required" type="xsd:string" /> | ||
<xsd:attribute name="type" type="xsd:string" /> | ||
<xsd:attribute name="mimetype" type="xsd:string" /> | ||
<xsd:attribute ref="xml:space" /> | ||
</xsd:complexType> | ||
</xsd:element> | ||
<xsd:element name="assembly"> | ||
<xsd:complexType> | ||
<xsd:attribute name="alias" type="xsd:string" /> | ||
<xsd:attribute name="name" type="xsd:string" /> | ||
</xsd:complexType> | ||
</xsd:element> | ||
<xsd:element name="data"> | ||
<xsd:complexType> | ||
<xsd:sequence> | ||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" /> | ||
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" /> | ||
</xsd:sequence> | ||
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" /> | ||
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" /> | ||
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" /> | ||
<xsd:attribute ref="xml:space" /> | ||
</xsd:complexType> | ||
</xsd:element> | ||
<xsd:element name="resheader"> | ||
<xsd:complexType> | ||
<xsd:sequence> | ||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" /> | ||
</xsd:sequence> | ||
<xsd:attribute name="name" type="xsd:string" use="required" /> | ||
</xsd:complexType> | ||
</xsd:element> | ||
</xsd:choice> | ||
</xsd:complexType> | ||
</xsd:element> | ||
</xsd:schema> | ||
<resheader name="resmimetype"> | ||
<value>text/microsoft-resx</value> | ||
</resheader> | ||
<resheader name="version"> | ||
<value>2.0</value> | ||
</resheader> | ||
<resheader name="reader"> | ||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> | ||
</resheader> | ||
<resheader name="writer"> | ||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> | ||
</resheader> | ||
<assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> | ||
<data name="wsj_0_18_bidirectional_nodistsim" type="System.Resources.ResXFileRef, System.Windows.Forms"> | ||
<value>..\Resources\wsj-0-18-bidirectional-nodistsim.tagger;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> | ||
</data> | ||
</root> |
Binary file not shown.
Oops, something went wrong.