Skip to content
This repository has been archived by the owner on Jul 14, 2018. It is now read-only.

Commit

Permalink
New Word Guesser
Browse files Browse the repository at this point in the history
  • Loading branch information
SquidDev committed Sep 19, 2014
1 parent 19c6bc2 commit 0df9903
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 72 deletions.
81 changes: 61 additions & 20 deletions C#/Cipher/Analysis/AutoSpace/WordGuesser.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
using System;
using Cipher.Text;
using Cipher.Utils;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;

namespace Cipher.Analysis.AutoSpace
{
Expand Down Expand Up @@ -91,39 +92,79 @@ public WordGuesser(string Input)
{
LoadFiles();

/*prob = [[-99e99]*maxwordlen for _ in range(len(text))]
strs = [['']*maxwordlen for _ in range(len(text))]
for j in range(maxwordlen):
prob[0][j] = self.cPw(text[:j+1])
strs[0][j] = [text[:j+1]]
for i in range(1,len(text)):
for j in range(maxwordlen):
if i+j+1 > len(text): break
candidates = [(prob[i-k-1][k] + self.cPw(text[i:i+j+1],strs[i-k-1][k][-1]),
strs[i-k-1][k] + [text[i:i+j+1]] ) for k in range(min(i,maxwordlen))]
prob[i][j], strs[i][j] = max(candidates)
ends = [(prob[-i-1][i],strs[-i-1][i]) for i in range(min(len(text),maxwordlen))]
return max(ends)
*/
// Trim whitespace and uppercase
Input = Input.UpperNoSpace();
int TextLength = Input.Length;

double[,] Probabilities = new double[TextLength, MAX_WORD_LENGTH];
List<string>[,] Strings = new List<string>[TextLength, MAX_WORD_LENGTH];
string[,][] Strings = new string[TextLength, MAX_WORD_LENGTH][];

for(int X = 1; X < TextLength; X++)
{
for(int Y = 0; Y < MAX_WORD_LENGTH; Y++)
{
Probabilities[X, Y] = double.NegativeInfinity;
Strings[X, Y] = new List<string>();
Probabilities[X, Y] = Double.NegativeInfinity;
Strings[X, Y] = new string[] { " " };
}
}

for(int Y = 0; Y < MAX_WORD_LENGTH; Y++)
{
string Sub = Input.Substring(0, Y + 1);
Probabilities[0, Y] = ConditionalWordProbability(Sub);
Strings[0, Y] = new List<string> { Sub };
Strings[0, Y] = new string[]{ Sub };
}

double BestProbability = Double.NegativeInfinity;
string[] BestStrings = null;


for(int I = 1; I < TextLength; I++)
{
int Min = Math.Min(I, MAX_WORD_LENGTH);

for(int J = 0; J < MAX_WORD_LENGTH; J++)
{
if (I + J + 1 > TextLength) break;

BestProbability = Double.NegativeInfinity;

for(int K = 0; K < Min; K++)
{
string[] OldStrings = Strings[I-K-1, K];
string ThisString = Input.Substring(I, J + 1);
double ThisProbability = Probabilities[I - K - 1, K] + ConditionalWordProbability(ThisString, OldStrings.LastValue());

if(ThisProbability > BestProbability)
{
int OldLength = OldStrings.Length;
BestProbability = ThisProbability;
BestStrings = new string[OldLength + 1];
OldStrings.CopyTo(BestStrings, 0);
BestStrings[OldLength] = ThisString;
}
}

Probabilities[I, J] = BestProbability;
Strings[I, J] = BestStrings;
}
}

int Minimum = Math.Min(TextLength, MAX_WORD_LENGTH);

BestProbability = Double.NegativeInfinity;

for (int I = 0; I < Minimum; I++)
{
double ThisProbability = Probabilities[TextLength - I - 1, I];
if(ThisProbability > BestProbability)
{
BestStrings = Strings[TextLength - I - 1, I];
}
}

Result = String.Join(" ", BestStrings);
Score = BestProbability;
}

protected double ConditionalWordProbability(string Word, string Previous = "<UNK>")
Expand Down
1 change: 1 addition & 0 deletions C#/Cipher/Cipher.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
<Compile Include="Frequency\QuadgramStatistics.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Utils\DefaultDict.cs" />
<Compile Include="Utils\IEnumerableUtilities.cs" />
<Compile Include="Utils\MathsUtilities.cs" />
<Compile Include="Text\ScoredLetterArray.cs" />
<Compile Include="Text\TextArray.cs" />
Expand Down
9 changes: 9 additions & 0 deletions C#/Cipher/Text/TextExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace Cipher.Text
{
Expand Down Expand Up @@ -55,5 +56,13 @@ public static byte ToLetterByte(this Char Character)

return byte.MaxValue;
}

/// <summary>
/// Removes whitespace and capitalises
/// </summary>
public static string UpperNoSpace(this string Text)
{
return new String(Text.Where(C => !Char.IsWhiteSpace(C)).Select(C => Char.ToUpper(C)).ToArray());
}
}
}
53 changes: 53 additions & 0 deletions C#/Cipher/Utils/IEnumerableUtilities.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace Cipher.Utils
{
public static class IEnumerableUtilities
{
public static int MaxIndex<T>(this IEnumerable<T> Enumerable)
where T : IComparable<T>
{
int MaxIndex = -1;
T MaxValue = default(T);

int Index = 0;
foreach (T Value in Enumerable)
{
if (MaxIndex == -1 || Value.CompareTo(MaxValue) > 0)
{
MaxIndex = Index;
MaxValue = Value;
}
Index++;
}
return MaxIndex;
}

public static KeyValuePair<int, T> MaxIndexValue<T>(this IEnumerable<T> Enumerable)
where T : IComparable<T>
{
int MaxIndex = -1;
T MaxValue = default(T);

int Index = 0;
foreach (T Value in Enumerable)
{
if (MaxIndex == -1 || Value.CompareTo(MaxValue) > 0)
{
MaxIndex = Index;
MaxValue = Value;
}
Index++;
}
return new KeyValuePair<int, T>(MaxIndex, MaxValue); ;
}

public static T LastValue<T>(this IList<T> List)
{
return List[List.Count - 1];
}
}
}
19 changes: 0 additions & 19 deletions C#/Cipher/Utils/MathsUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,5 @@ public static double Chai(int Value, double Expected)
{
return Math.Pow((Value - Expected), 2) / Expected;
}

public static int MaxIndex<T>(this IEnumerable<T> Enumerable)
where T : IComparable<T>
{
int MaxIndex = -1;
T MaxValue = default(T);

int Index = 0;
foreach (T Value in Enumerable)
{
if (MaxIndex == -1 || Value.CompareTo(MaxValue) > 0)
{
MaxIndex = Index;
MaxValue = Value;
}
Index++;
}
return MaxIndex;
}
}
}
26 changes: 5 additions & 21 deletions C#/CipherPrompt/Commands/AutoSpaceCommand.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using Cipher.Text.WordParser;
using Cipher.Text.WordParser.Storage;
using Cipher.Analysis.AutoSpace;
using NDesk.Options;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -27,34 +26,19 @@ public override void Run(IEnumerable<string> Args, TextReader Input, TextWriter

if (ShowHelp)
{
Console.WriteLine("Usage: CipherPrompt {0} [OPTIONS] (SAMPLE TEXT)+", Name);
Console.WriteLine("Usage: CipherPrompt {0} [OPTIONS]", Name);
Console.WriteLine();
Console.WriteLine(Description);
Console.WriteLine();
Console.WriteLine("Available Options:");
Options.WriteOptionDescriptions(Console.Out);
Console.WriteLine();
Console.WriteLine("Sample texts:");
Console.WriteLine("\tTexts are loaded from a directory or file");
Console.WriteLine("\tXML files will be processed as dictionaries");

return;
}

// Lazy Init variables
GuessLoader Loader = new GuessLoader(Extra);
if (Loader.Count <= 0)
{
Console.WriteLine("Must include sample texts");
Console.WriteLine("Run `CipherPrompt help` and CipherPrompt help {0}` for more info", Name);
return;
}


// Load text sources
Loader.Load();

WordGuesser Guesser = new WordGuesser(Input.ReadToEnd(), Loader.Frequencies);

WordGuesser Guesser = new WordGuesser(Input.ReadToEnd());
Console.WriteLine("Score {0}", Guesser.Score);
Output.WriteLine(String.Join(" ", Guesser.Result));
}
}
Expand Down
9 changes: 2 additions & 7 deletions C#/CipherPrompt/TestingProg.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Cipher.Text.WordParser;
using Cipher.Analysis.AutoSpace;
using System;
using System.IO;

Expand All @@ -14,13 +14,8 @@ static void Main(string[] args)
#endif
DateTime Start = DateTime.Now;

FrequencyStorage Storage = new FrequencyStorage();
using (StreamReader Reader = new StreamReader("dickens-twocities.txt"))
{
Storage.ReadText(Reader.ReadToEnd());
}

WordGuesser Guesser = new WordGuesser("philithasbeenstaringyouinthefaceallthistime", Storage);
WordGuesser Guesser = new WordGuesser("PHILITHASBEENSTARINGYOUINTHEFACEALLTHISTIMEBUTICANUNDERSTANDWHYYOUDIDNTSEEITTHATGIRLCOULDFORGEANYTHINGIFINALLYUNDERSTOODWHATHADHAPPENEDWHENICRACKEDTHEENCLOSEDREPORTFROMTHENAZIARCHIVESTHEMONALISAHASHADAPRETTYADVENTUROUSTIMEOVERTHELASTFORTYYEARSTHENAZISDISCOVEREDTHATPERUGGIAWASANASSOCIATEOFTHEGHETTOFAMILYANDTHATAFTERHEHADTAKENTHEPAINTINGTOITALYITHADBEENINTHEIRCUSTODYFORATLEASTSOMEPARTOFTHETWOYEARSITWASMISSINGREADTHEREPORTYOURSELFANDYOUWILLSEETHATTHEYCAMETOTHECONCLUSIONTHATTHEGHETTOSHELDONTOHERBECAUSETHEYCOULDNTMAKEACONVINCINGFORGERYIDONTBUYTHATANDIDONTBELIEVETHATTHEYWOULDHAVELETPERUGGIAOFFERITTOANYONEELSEONCETHEYHADHOLDOFITTHEMSELVESTHENAZISHAVEFALLENFORTHESAMETRICKTHATFOOLEDEVERYONEELSETHEPRETENCETHATTHEPAINTINGPERUGGIASHOWEDTOGERIWASTHEORIGINALITWASAFAKEPOSSIBLYTHEBESTFAKEINARTHISTORYIDONTKNOWWHETHERPERUGGIAWASINONTHEDECEPTIONORNOTGIVENTHEIRSKILLSTHEGHETTOFAMILYCANTHAVEBEENSHORTOFMONEYORINFLUENCEANDHEWASNOTINSIDEFORLONGEITHERWAYTHEFAKEWASRESTOREDTOFRANCEANDTHEORIGINALWASHIDDENAWAYWHENFALSCHUNGORDEREDSARATOCOPYTHEPARISMONALISAHEBELIEVEDALONGWITHTHERESTOFTHEWORLDTHATITWASTHEORIGINALSARAMUSTHAVEKNOWNTHETRUESTORYANDPRODUCEDANEWANDALMOSTPERFECTFAKEFORHIMBUTUSEDANOLDSCRAPOFLEADPIPEFROMTHECELLARTOMARKTHEBOARDWITHTHENAZIEMBLEMKNOWINGTHATATSOMEPOINTTHEFORGERYWOULDBEDISCOVEREDSHEMUSTHAVEHOPEDTHATTHENAZISWOULDBEBLAMEDFORTHETHEFTOFTHEMONALISAPROVIDINGTHEPERFECTCOVERFORHEROWNFAMILYSORIGINALCRIMETHATLEFTONEMYSTERYWHEREWASTHEPAINTINGSHEREPLACEDSHECOULDNTHAVETRAVELLEDSOUTHWITHTHEPERUGGIAFORGERYSINCEITWASPAINTEDONABOARDNOTSOMETHINGSHECOULDEASILYCONCEALSOSHEMUSTHAVELEFTTHEPAINTINGBEHINDBUTACCORDINGTOTHEREPORTTHEHOUSEINMONTMARTREWASTHOROUGHLYSEARCHEDBYEXPERTSTHENISAWITHERTOOLSANDWORKSINTHEVENICEAPARTMENTWEREHIDDENBEHINDTHEPANELSIREMEMBEREDSARASREMARKFROMHERDIARYIWASNEARLYCAUGHTLASTNIGHTHECAMETOCHECKONMEANDIJUSTHADTIMETOTURNTHECANVASAGAINSTTHEWALLINTHESHADOWSTHEMONALISAWASNTPAINTEDONCANVASSOWHATELSEWOULDSHEHAVEBEENPAINTINGITOOKALOOKDOWNTHERETHISAFTERNOONITISVERYDARKINTHEBACKCORNEROFTHATCELLARWITHNONATURALLIGHTANDATHICKCOATINGOFDIRTANDDUSTONTHEWALLSWHICHSEEMSTOABSORBANYILLUMINATIONLEAVINGITHARDTOMAKEOUTDETAILSEVENWITHAGOODTORCHIFYOUDIDNTKNOWITWASTHEREYOUWOULDNEVERFINDITBEHINDACAMOUFLAGEDFLAPPINNEDTOTHEWALLIFOUNDTHEPERUGGIACOPYTHEDISGUISEWASAWORKOFARTTHECANVASTHICKLYCOATEDWITHBRICKDUSTANDDIRTMIXEDWITHBINDERSSHEMUSTHAVESMUGGLEDDOWNFROMTHEATTICSTUDIOITWASFASTENEDTOTHEWALLWITHSOFTLEADWEDGESJAMMEDINTOTHECRACKSINTHEMORTAREASYTOREMOVEBUTEASILYCOVEREDWITHMOREOFTHEDIRTICANSEEWHYNOONESPOTTEDITBEFORETHEPERUGGIACOPYWASANOTHERMASTERPIECEBUTITBEGSTHEQUESTIONOFWHERETHEORIGINALMIGHTBEIAMNOTSUREWEWILLEVERFINDITANDIAMCERTAINTHEFRENCHWILLNOTLIKEITIFWETELLTHEMTHATIWOULDSUGGESTTHATWEKEEPITTOOURSELVESSINCETHEPERUGGIACOPYWASDESIGNEDTOSTAYUNDETECTEDUNLIKETHEEAGLEEMBLEMVERSIONMAYBEWESHOULDSWITCHTHEMANDFORGETABOUTITWECANBURNTHENAZIVERSIONANDMAYBENOONEWILLEVERKNOWTHATTHEPAINTINGINTHELOUVREISJUSTACOPYSOMEWHEREOUTTHERESARAANDHERFAMILYAREGUARDINGTHEORIGINALANDIFIEVERGETTHECHANCEIPLANTOGOHUNTINGFORITMAYBEYOUWOULDJOINMEHARRY");
Console.WriteLine(String.Join(" ", Guesser.Result));

if(Debug)
Expand Down
2 changes: 1 addition & 1 deletion C#/CipherWPF/Controls/AutoSpaceControl.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

<Grid Grid.Column="0">
<Button x:Name="ToggleProcessing" Content="Add spaces" Margin="0,5" VerticalAlignment="Top" HorizontalAlignment="Right" Width="75" Click="ToggleProcessing_Click" IsEnabled="{Binding Input, Converter={StaticResource StringLengthToBoolean}, ElementName=userControl}"/>
<TextBlock x:Name="ErrorMessages" Margin="0,35,0,5" TextWrapping="Wrap" Foreground="Red" TextAlignment="Right" Height="25"/>
<TextBlock x:Name="ErrorMessages" Margin="0,35,0,5" TextWrapping="Wrap" Foreground="Red" TextAlignment="Right"/>
</Grid>

<TextBox
Expand Down
7 changes: 5 additions & 2 deletions C#/CipherWPF/Controls/AutoSpaceControl.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ public AutoSpaceControl()

public void Space(string Input)
{
#if !DEBUG
try
{
#endif
WordGuesser Guesser = new WordGuesser(Input);
string Result = String.Join(" ", Guesser.Result);
Dispatcher.BeginInvoke(SuccessHandler, Result);
Dispatcher.BeginInvoke(SuccessHandler, Guesser.Result);
#if !DEBUG
}
catch (ThreadAbortException)
{
Expand All @@ -50,6 +52,7 @@ public void Space(string Input)
// Catch other exceptions
Dispatcher.BeginInvoke(ErrorHandler, e.Message);
}
#endif
}

public void ErrorMessage(string Error)
Expand Down
4 changes: 2 additions & 2 deletions C#/Performance.psess
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
</PostinstrumentEvent>
<Binaries>
<ProjBinary>
<Path>CipherPrompt\obj\Debug\CipherPrompt.exe</Path>
<Path>CipherPrompt\obj\Debug\Cipher.Prompt.exe</Path>
<ArgumentTimestamp>01/01/0001 00:00:00</ArgumentTimestamp>
<Instrument>true</Instrument>
<Sample>true</Sample>
Expand All @@ -48,7 +48,7 @@
<LaunchProject>true</LaunchProject>
<OverrideProjectSettings>false</OverrideProjectSettings>
<LaunchMethod>Executable</LaunchMethod>
<ExecutablePath>CipherPrompt\bin\Debug\CipherPrompt.exe</ExecutablePath>
<ExecutablePath>CipherPrompt\bin\Debug\Cipher.Prompt.exe</ExecutablePath>
<StartupDirectory>CipherPrompt\bin\Debug\</StartupDirectory>
<Arguments>
</Arguments>
Expand Down

0 comments on commit 0df9903

Please sign in to comment.