55
66namespace Rubberduck . RegexAssistant
77{
8- public interface Atom : IDescribable
8+ public interface IAtom : IDescribable
99 {
1010 string Specifier { get ; }
1111 }
1212
13- public class CharacterClass : Atom
13+ public class CharacterClass : IAtom
1414 {
1515 public static readonly string Pattern = @"(?<!\\)\[(?<expression>.*?)(?<!\\)\]" ;
1616 private static readonly Regex Matcher = new Regex ( "^" + Pattern + "$" ) ;
1717
18- public bool InverseMatching { get ; }
19- public IList < string > CharacterSpecifiers { get ; }
20- private readonly string specifier ;
18+ private readonly bool _inverseMatching ;
19+ public bool InverseMatching { get { return _inverseMatching ; } }
20+ private readonly IList < string > _characterSpecifiers ;
21+ public IList < string > CharacterSpecifiers { get { return _characterSpecifiers ; } }
22+ private readonly string _specifier ;
2123
2224 public CharacterClass ( string specifier )
2325 {
@@ -26,26 +28,25 @@ public CharacterClass(string specifier)
2628 {
2729 throw new ArgumentException ( "The given specifier does not denote a character class" ) ;
2830 }
29- this . specifier = specifier ;
31+ this . _specifier = specifier ;
3032 string actualSpecifier = m . Groups [ "expression" ] . Value ;
31- InverseMatching = actualSpecifier . StartsWith ( "^" ) ;
32- CharacterSpecifiers = new List < string > ( ) ;
33-
34- ExtractCharacterSpecifiers ( InverseMatching ? actualSpecifier . Substring ( 1 ) : actualSpecifier ) ;
33+ _inverseMatching = actualSpecifier . StartsWith ( "^" ) ;
34+ _characterSpecifiers = ExtractCharacterSpecifiers ( InverseMatching ? actualSpecifier . Substring ( 1 ) : actualSpecifier ) ;
3535 }
3636
3737 public string Specifier
3838 {
3939 get
4040 {
41- return specifier ;
41+ return _specifier ;
4242 }
4343 }
4444
4545 private static readonly Regex CharacterRanges = new Regex ( @"(\\[dDwWsS]|(\\[ntfvr]|\\([0-7]{3}|x[\dA-F]{2}|u[\dA-F]{4}|[\\\.\[\]])|.)(-(\\[ntfvr]|\\([0-7]{3}|x[A-F]{2}|u[\dA-F]{4}|[\.\\\[\]])|.))?)" ) ;
46- private void ExtractCharacterSpecifiers ( string characterClass )
46+ private IList < string > ExtractCharacterSpecifiers ( string characterClass )
4747 {
4848 MatchCollection specifiers = CharacterRanges . Matches ( characterClass ) ;
49+ var result = new List < string > ( ) ;
4950
5051 foreach ( Match specifier in specifiers )
5152 {
@@ -64,8 +65,9 @@ private void ExtractCharacterSpecifiers(string characterClass)
6465 continue ;
6566 }
6667 }
67- CharacterSpecifiers . Add ( specifier . Value ) ;
68+ result . Add ( specifier . Value ) ;
6869 }
70+ return result ;
6971 }
7072
7173 public string Description
@@ -84,82 +86,85 @@ private string HumanReadableClass()
8486 return string . Join ( ", " , CharacterSpecifiers ) ; // join last with and?
8587 }
8688
87- public bool TryMatch ( ref string text )
88- {
89- throw new NotImplementedException ( ) ;
90- }
91-
9289 public override bool Equals ( object obj )
9390 {
9491 if ( obj is CharacterClass )
9592 {
96- return ( obj as CharacterClass ) . specifier . Equals ( specifier ) ;
93+ return ( obj as CharacterClass ) . _specifier . Equals ( _specifier ) ;
9794 }
9895 return false ;
9996 }
10097 }
10198
102- class Group : Atom
99+ class Group : IAtom
103100 {
104101 public static readonly string Pattern = @"(?<!\\)\((?<expression>.*)(?<!\\)\)" ;
105102 private static readonly Regex Matcher = new Regex ( "^" + Pattern + "$" ) ;
106103
107- private readonly IRegularExpression subexpression ;
108- private readonly string specifier ;
104+ private readonly IRegularExpression _subexpression ;
105+ private readonly string _specifier ;
109106
110107 public Group ( string specifier ) {
111108 Match m = Matcher . Match ( specifier ) ;
112109 if ( ! m . Success )
113110 {
114111 throw new ArgumentException ( "The given specifier does not denote a Group" ) ;
115112 }
116- subexpression = RegularExpression . Parse ( m . Groups [ "expression" ] . Value ) ;
117- this . specifier = specifier ;
113+ _subexpression = RegularExpression . Parse ( m . Groups [ "expression" ] . Value ) ;
114+ _specifier = specifier ;
118115 }
119116
120117 public string Specifier
121118 {
122119 get
123120 {
124- return specifier ;
121+ return _specifier ;
125122 }
126123 }
127124
128125 public string Description
129126 {
130127 get
131128 {
132- return string . Format ( AssistantResources . AtomDescription_Group , specifier ) + "\r \n " + subexpression . Description ;
129+ return string . Format ( AssistantResources . AtomDescription_Group , _specifier ) + "\r \n " + _subexpression . Description ;
133130 }
134131 }
135132
136- public bool TryMatch ( ref string text )
137- {
138- throw new NotImplementedException ( ) ;
139- }
140-
141133 public override bool Equals ( object obj )
142134 {
143135 if ( obj is Group )
144136 {
145- return ( obj as Group ) . specifier . Equals ( specifier ) ;
137+ return ( obj as Group ) . _specifier . Equals ( _specifier ) ;
146138 }
147139 return false ;
148140 }
149141 }
150142
151- class Literal : Atom
143+ class Literal : IAtom
152144 {
153145 public static readonly string Pattern = @"(?<expression>\\(u[\dA-F]{4}|x[\dA-F]{2}|[0-7]{3}|[bB\(\){}\\\[\]\.+*?1-9nftvrdDwWsS])|[^()\[\]{}\\*+?])" ;
154146 private static readonly Regex Matcher = new Regex ( "^" + Pattern + "$" ) ;
155147 private static readonly ISet < char > EscapeLiterals = new HashSet < char > ( ) ;
156- private readonly string specifier ;
148+ private readonly string _specifier ;
157149
158150 static Literal ( ) {
159151 foreach ( char escape in new char [ ] { '.' , '+' , '*' , '?' , '(' , ')' , '{' , '}' , '[' , ']' , '|' , '\\ ' } )
160152 {
161153 EscapeLiterals . Add ( escape ) ;
162154 }
155+ _escapeDescriptions . Add ( 'd' , AssistantResources . AtomDescription_Digit ) ;
156+ _escapeDescriptions . Add ( 'D' , AssistantResources . AtomDescription_NonDigit ) ;
157+ _escapeDescriptions . Add ( 'b' , AssistantResources . AtomDescription_WordBoundary ) ;
158+ _escapeDescriptions . Add ( 'B' , AssistantResources . AtomDescription_NonWordBoundary ) ;
159+ _escapeDescriptions . Add ( 'w' , AssistantResources . AtomDescription_WordCharacter ) ;
160+ _escapeDescriptions . Add ( 'W' , AssistantResources . AtomDescription_NonWordCharacter ) ;
161+ _escapeDescriptions . Add ( 's' , AssistantResources . AtomDescription_Whitespace ) ;
162+ _escapeDescriptions . Add ( 'S' , AssistantResources . AtomDescription_NonWhitespace ) ;
163+ _escapeDescriptions . Add ( 'n' , AssistantResources . AtomDescription_Newline ) ;
164+ _escapeDescriptions . Add ( 'r' , AssistantResources . AtomDescription_CarriageReturn ) ;
165+ _escapeDescriptions . Add ( 'f' , AssistantResources . AtomDescription_FormFeed ) ;
166+ _escapeDescriptions . Add ( 'v' , AssistantResources . AtomDescription_VTab ) ;
167+ _escapeDescriptions . Add ( 't' , AssistantResources . AtomDescription_HTab ) ;
163168 }
164169
165170 public Literal ( string specifier )
@@ -169,17 +174,19 @@ public Literal(string specifier)
169174 {
170175 throw new ArgumentException ( "The given specifier does not denote a Literal" ) ;
171176 }
172- this . specifier = specifier ;
177+ _specifier = specifier ;
173178 }
174179
175180 public string Specifier
176181 {
177182 get
178183 {
179- return specifier ;
184+ return _specifier ;
180185 }
181186 }
182187
188+
189+ private static readonly Dictionary < char , string > _escapeDescriptions = new Dictionary < char , string > ( ) ;
183190 public string Description
184191 {
185192 get
@@ -190,9 +197,9 @@ public string Description
190197 // - escape sequences (each having a different description)
191198 // - codepoint escapes (belongs into above category but kept separate)
192199 // - and actually boring literal matches
193- if ( specifier . Length > 1 )
200+ if ( _specifier . Length > 1 )
194201 {
195- string relevant = specifier . Substring ( 1 ) ; // skip the damn Backslash at the start
202+ string relevant = _specifier . Substring ( 1 ) ; // skip the damn Backslash at the start
196203 if ( relevant . Length > 1 ) // longer sequences
197204 {
198205 if ( relevant . StartsWith ( "u" ) )
@@ -218,65 +225,23 @@ public string Description
218225 }
219226 else
220227 {
221- // special escapes here
222- switch ( relevant [ 0 ] )
223- {
224- case 'd' :
225- return AssistantResources . AtomDescription_Digit ;
226- case 'D' :
227- return AssistantResources . AtomDescription_NonDigit ;
228- case 'b' :
229- return AssistantResources . AtomDescription_WordBoundary ;
230- case 'B' :
231- return AssistantResources . AtomDescription_NonWordBoundary ;
232- case 'w' :
233- return AssistantResources . AtomDescription_WordCharacter ;
234- case 'W' :
235- return AssistantResources . AtomDescription_NonWordCharacter ;
236- case 's' :
237- return AssistantResources . AtomDescription_Whitespace ;
238- case 'S' :
239- return AssistantResources . AtomDescription_NonWhitespace ;
240- case 'n' :
241- return AssistantResources . AtomDescription_Newline ;
242- case 'r' :
243- return AssistantResources . AtomDescription_CarriageReturn ;
244- case 'f' :
245- return AssistantResources . AtomDescription_FormFeed ;
246- case 'v' :
247- return AssistantResources . AtomDescription_VTab ;
248- case 't' :
249- return AssistantResources . AtomDescription_HTab ;
250- default :
251- // shouldn't ever happen, so we blow it all up
252- throw new InvalidOperationException ( "took an escape sequence that shouldn't exist" ) ;
253- }
228+ return _escapeDescriptions [ relevant [ 0 ] ] ;
254229 }
255230 }
256- else
231+ if ( _specifier . Equals ( "." ) )
257232 {
258- if ( specifier . Equals ( "." ) )
259- {
260- return AssistantResources . AtomDescription_Dot ;
261- }
262- // Behaviour with "." needs fix
263- return string . Format ( AssistantResources . AtomDescription_Literal_ActualLiteral , specifier ) ;
233+ return AssistantResources . AtomDescription_Dot ;
264234 }
235+ return string . Format ( AssistantResources . AtomDescription_Literal_ActualLiteral , _specifier ) ;
265236
266- throw new NotImplementedException ( ) ;
267237 }
268238 }
269239
270- public bool TryMatch ( ref string text )
271- {
272- throw new NotImplementedException ( ) ;
273- }
274-
275240 public override bool Equals ( object obj )
276241 {
277242 if ( obj is Literal )
278243 {
279- return ( obj as Literal ) . specifier . Equals ( specifier ) ;
244+ return ( obj as Literal ) . _specifier . Equals ( _specifier ) ;
280245 }
281246 return false ;
282247 }
0 commit comments