Cleanup of c# syntax

AlexPoint · Jun 25, 2014 · 605b03e · 605b03e
1 parent 5bcab4d
commit 605b03e
Show file tree

Hide file tree

Showing 12 changed files with 207 additions and 238 deletions.
diff --git a/ModelConverter/Converter.cs b/ModelConverter/Converter.cs
@@ -40,7 +40,7 @@ private static bool ConvertFolder(string folder)
 		{
 			try
 			{
-				BinaryGisModelWriter writer = new BinaryGisModelWriter();
+				var writer = new BinaryGisModelWriter();
 
 				foreach (string file in Directory.GetFiles(folder))
 				{

diff --git a/OpenNLP/Tools/SentenceDetect/EnglishMaximumEntropySentenceDetector.cs b/OpenNLP/Tools/SentenceDetect/EnglishMaximumEntropySentenceDetector.cs
@@ -38,17 +38,16 @@
 namespace OpenNLP.Tools.SentenceDetect
 {
 	/// <summary>
-	/// A sentence detector which uses a model trained on English data (Wall Street
-	/// Journal text).
+	/// A sentence detector which uses a model trained on English data 
+	/// (Wall Street Journal text).
 	/// </summary>
 	public class EnglishMaximumEntropySentenceDetector : MaximumEntropySentenceDetector
 	{
 		/// <summary>
 		/// Constructor which loads the English sentence detection model
 		/// transparently.
 		/// </summary>
-		public EnglishMaximumEntropySentenceDetector(string name) : base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name)))
-		{
-		}
+		public EnglishMaximumEntropySentenceDetector(string name): 
+            base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name))){}
 	}
 }
diff --git a/OpenNLP/Tools/Tokenize/MaximumEntropyTokenizer.cs b/OpenNLP/Tools/Tokenize/MaximumEntropyTokenizer.cs
@@ -49,7 +49,7 @@ namespace OpenNLP.Tools.Tokenize
 	/// </summary>
 	public class MaximumEntropyTokenizer : ITokenizer
 	{
-        internal static Regex AlphaNumeric = new Regex("^[A-Za-z0-9]+$");
+        internal static Regex AlphaNumeric = new Regex("^[A-Za-z0-9]+$", RegexOptions.Compiled);
 
 		/// <summary>
 		/// the maximum entropy model to use to evaluate contexts.
@@ -61,31 +61,16 @@ public class MaximumEntropyTokenizer : ITokenizer
 		/// </summary>
         private readonly SharpEntropy.IContextGenerator<Tuple<string, int>> _contextGenerator;
 
-		/// <summary>
-		/// Optimization flag to skip alpha numeric tokens for further tokenization 
-		/// </summary>
-		private bool _mAlphaNumericOptimization;
-
 		/// <summary>
 		/// List of probabilities for each token returned from call to Tokenize() 
 		/// </summary>
 		private readonly List<double> _tokenProbabilities;
 		private readonly List<Util.Span> _newTokens;
 
 		/// <summary>
-		/// Used to have the tokenizer ignore tokens which only contain alpha-numeric characters.
+        /// Optimization flag to skip alpha numeric tokens for further tokenization.
 		/// </summary>
-		virtual public bool AlphaNumericOptimization
-		{
-			get
-			{
-				return _mAlphaNumericOptimization;
-			}
-			set
-			{
-				_mAlphaNumericOptimization = value;
-			}
-		}
+		virtual public bool AlphaNumericOptimization { get; set; }
 
 		/// <summary>
 		/// Class constructor which takes the string locations of the
@@ -94,21 +79,15 @@ virtual public bool AlphaNumericOptimization
 		public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model)
 		{
 			_contextGenerator = new TokenContextGenerator();
-			_mAlphaNumericOptimization = false;
+			AlphaNumericOptimization = false;
 			this._model = model;
             _newTokens = new List<Util.Span>();
 			_tokenProbabilities = new List<double>(50);
 		}
 
-		/// <summary> 
-		/// Tokenizes the string.
-		/// </summary>
-		/// <param name="input">
-		/// The string to be tokenized.
-		/// </param>
-		/// <returns>
-		/// A span array containing individual tokens as elements.
-		/// </returns>
+		/// <summary>Tokenizes the string</summary>
+		/// <param name="input">The string to be tokenized</param>
+		/// <returns>A span array containing individual tokens as elements</returns>
 		public virtual Util.Span[] TokenizePositions(string input)
 		{
 			Util.Span[] tokens = Split(input);
@@ -159,15 +138,9 @@ public virtual Util.Span[] TokenizePositions(string input)
 			return _newTokens.ToArray();
 		}
 
-		/// <summary> 
-		/// Tokenize a string.
-		/// </summary>
-		/// <param name="input">
-		/// The string to be tokenized.
-		/// </param>
-		/// <returns>   
-		/// A string array containing individual tokens as elements.
-		/// </returns>
+		/// <summary>Tokenize a string</summary>
+		/// <param name="input">The string to be tokenized</param>
+		/// <returns>A string array containing individual tokens as elements</returns>
 		public virtual string[] Tokenize(string input)
 		{
 			Util.Span[] tokenSpans = TokenizePositions(input);
@@ -184,12 +157,8 @@ public virtual string[] Tokenize(string input)
 		/// delimited token. Token strings can be constructed form these
 		/// spans as follows: input.Substring(span.Start, span.End);
 		/// </summary>
-		/// <param name="input">
-		/// string to tokenize.
-		/// </param>
-		/// <returns> 
-		/// Array of spans.
-		/// </returns>
+		/// <param name="input">string to tokenize</param>
+		/// <returns>Array of spans</returns>
 		internal static Util.Span[] Split(string input)
 		{
 			int tokenStart = - 1;

diff --git a/OpenNLP/Tools/Tokenize/TokenContextGenerator.cs b/OpenNLP/Tools/Tokenize/TokenContextGenerator.cs
@@ -43,7 +43,13 @@ namespace OpenNLP.Tools.Tokenize
 	/// </summary>
     public class TokenContextGenerator : SharpEntropy.IContextGenerator<Tuple<string, int>>
 	{
+		/// <summary>
+		/// Split the string
+		/// </summary>
 		public const string SplitIndicator = "T";
+        /// <summary>
+        /// Don't split the string
+        /// </summary>
 		public const string NoSplitIndicator = "F";
 
 		/// <summary>
@@ -53,41 +59,52 @@ public class TokenContextGenerator : SharpEntropy.IContextGenerator<Tuple<string
 		/// </summary>
         public virtual string[] GetContext(Tuple<string, int> pair)
 		{
-            string data = pair.Item1;
+            string token = pair.Item1;
 			int index = pair.Item2;
 
-            var predicates = new List<string>();
-			predicates.Add("p=" + data.Substring(0, (index) - (0)));
-			predicates.Add("s=" + data.Substring(index));
-			if (index > 0)
+            // add strings before and after the index in the token
+            var predicates = new List<string>
+            {
+                "p=" + token.Substring(0, index), 
+                "s=" + token.Substring(index)
+            };
+		    if (index > 0)
 			{
-				AddCharPredicates("p1", data[index - 1], predicates);
+                // add predicates for character just before the current index
+                AddCharPredicates("p1", token[index - 1], predicates);
+                predicates.Add("p1f1=" + token[index - 1] + token[index]);
 				if (index > 1)
 				{
-					AddCharPredicates("p2", data[index - 2], predicates);
-					predicates.Add("p21=" + data[index - 2] + data[index - 1]);
+                    // add predicates for the character 2 positions before the current index
+					AddCharPredicates("p2", token[index - 2], predicates);
+					predicates.Add("p21=" + token[index - 2] + token[index - 1]);
 				}
 				else
 				{
 					predicates.Add("p2=bok");
 				}
-				predicates.Add("p1f1=" + data[index - 1] + data[index]);
 			}
 			else
 			{
 				predicates.Add("p1=bok");
 			}
-			AddCharPredicates("f1", data[index], predicates);
-			if (index + 1 < data.Length)
+
+            // add predicates for char at the current index
+			AddCharPredicates("f1", token[index], predicates);
+
+            // add predicates for the char just after
+			if (index + 1 < token.Length)
 			{
-				AddCharPredicates("f2", data[index + 1], predicates);
-				predicates.Add("f12=" + data[index] + data[index + 1]);
+				AddCharPredicates("f2", token[index + 1], predicates);
+				predicates.Add("f12=" + token[index] + token[index + 1]);
 			}
 			else
 			{
 				predicates.Add("f2=bok");
 			}
-			if (data[0] == '&' && data[data.Length - 1] == ';')
+
+            // test if token starts by '&' or ends by ';'
+			if (token[0] == '&' && token[token.Length - 1] == ';')
 			{
 				predicates.Add("cc"); //character code
 			}
@@ -103,36 +120,44 @@ private void AddCharPredicates(string key, char c, List<string> predicates)
 			predicates.Add(key + "=" + c);
 			if (char.IsLetter(c))
 			{
+                // whether it's a letter
 				predicates.Add(key + "_alpha");
 				if (char.IsUpper(c))
 				{
+                    // whether it's upper case
 					predicates.Add(key + "_caps");
 				}
 			}
 			else if (char.IsDigit(c))
 			{
+                // whether it's a digit
 				predicates.Add(key + "_num");
 			}
 			else if (char.IsWhiteSpace(c))
 			{
+                // whether it's whitespace
 				predicates.Add(key + "_ws");
 			}
 			else
 			{
 				if (c == '.' || c == '?' || c == '!')
 				{
+                    // whether it's an end of sentence
 					predicates.Add(key + "_eos");
 				}
 				else if (c == '`' || c == '"' || c == '\'')
 				{
+                    // whether it's a quote
 					predicates.Add(key + "_quote");
 				}
 				else if (c == '[' || c == '{' || c == '(')
 				{
+                    // whether it's a left parenthesis
 					predicates.Add(key + "_lp");
 				}
 				else if (c == ']' || c == '}' || c == ')')
 				{
+                    // whether it's a right parenthesis
 					predicates.Add(key + "_rp");
 				}
 			}

diff --git a/OpenNLP/Tools/Tokenize/TokenEventReader.cs b/OpenNLP/Tools/Tokenize/TokenEventReader.cs
@@ -45,18 +45,18 @@ namespace OpenNLP.Tools.Tokenize
 	/// </summary>
 	public class TokenEventReader : SharpEntropy.ITrainingEventReader
 	{
-        private static readonly SharpEntropy.IContextGenerator<Tuple<string, int>> mContextGenerator = new TokenContextGenerator();
-		private StreamReader mStreamReader;
-        private List<SharpEntropy.TrainingEvent> mEventList = new List<SharpEntropy.TrainingEvent>();
-		private int mCurrentEvent = 0;
+        private static readonly SharpEntropy.IContextGenerator<Tuple<string, int>> ContextGenerator = new TokenContextGenerator();
+		private readonly StreamReader _streamReader;
+        private readonly List<SharpEntropy.TrainingEvent> _eventList = new List<SharpEntropy.TrainingEvent>();
+		private int _currentEvent = 0;
 
 		/// <summary>
 		/// Class constructor.
 		/// </summary>
 		public TokenEventReader(StreamReader dataReader)
 		{
-			mStreamReader = dataReader;
-			string nextLine = mStreamReader.ReadLine();
+			_streamReader = dataReader;
+			string nextLine = _streamReader.ReadLine();
 			if (nextLine != null)
 			{
 				AddEvents(nextLine);
@@ -65,43 +65,36 @@ public TokenEventReader(StreamReader dataReader)
 
 		private void AddEvents(string line)
 		{
-			string[] spacedTokens = line.Split(' ');
-			for (int currentToken = 0; currentToken < spacedTokens.Length; currentToken++)
-			{
-				string buffer = spacedTokens[currentToken];
-				if (MaximumEntropyTokenizer.AlphaNumeric.IsMatch(buffer))
-				{
-					int lastIndex = buffer.Length - 1;
-					for (int index = 0; index < buffer.Length; index++)
-					{
-                        string[] context = mContextGenerator.GetContext(new Tuple<string, int>(buffer, index));
-						if (index == lastIndex)
-						{
-							mEventList.Add(new SharpEntropy.TrainingEvent("T", context));
-						}
-						else
-						{
-							mEventList.Add(new SharpEntropy.TrainingEvent("F", context));
-						}
-					}
-				}
-			}
+		    string[] spacedTokens = line.Split(' ');
+		    foreach (string buffer in spacedTokens)
+		    {
+		        if (MaximumEntropyTokenizer.AlphaNumeric.IsMatch(buffer))
+		        {
+		            int lastIndex = buffer.Length - 1;
+		            for (int index = 0; index < buffer.Length; index++)
+		            {
+		                string[] context = ContextGenerator.GetContext(new Tuple<string, int>(buffer, index));
+		                var trainingEvent = new SharpEntropy.TrainingEvent(index == lastIndex ? "T" : "F", context);
+		                _eventList.Add(trainingEvent);
+		            }
+		        }
+		    }
 		}
 
-		public virtual bool HasNext()
+	    public virtual bool HasNext()
 		{
-			return (mCurrentEvent < mEventList.Count);
+			return (_currentEvent < _eventList.Count);
 		}
 
 		public virtual SharpEntropy.TrainingEvent ReadNextEvent()
 		{
-			SharpEntropy.TrainingEvent trainingEvent = mEventList[mCurrentEvent];
-			mCurrentEvent++;
-			if (mEventList.Count == mCurrentEvent)
+			SharpEntropy.TrainingEvent trainingEvent = _eventList[_currentEvent];
+			_currentEvent++;
+			if (_eventList.Count == _currentEvent)
 			{
-				mCurrentEvent = 0;
-				mEventList.Clear();
-				string nextLine = mStreamReader.ReadLine();
+				_currentEvent = 0;
+				_eventList.Clear();
+				string nextLine = _streamReader.ReadLine();
 				if (nextLine != null)
 				{
 					AddEvents(nextLine);

diff --git a/ParseTree/App.config b/ParseTree/App.config
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <configuration>
   <appSettings>
-    <add key="MaximumEntropyModelDirectory" value="C:\Users\Alex\Documents\GitHub\sharpnlp\OpenNLP\Resources\Models\"/>
+    <add key="MaximumEntropyModelDirectory" value="C:\Users\Alexandre\Documents\GitHub\sharpnlp\OpenNLP\Resources\Models\"/>
   </appSettings>
 <startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/></startup></configuration>