Skip to content

Commit

Permalink
unit test additions (including user input validation testing); dead c…
Browse files Browse the repository at this point in the history
…ode removal for code coverage (including KDO & associated utils); misc fixes & revs (dotnet#22)
  • Loading branch information
daholste authored and Dmitry-A committed Aug 22, 2019
1 parent b624049 commit 0361c44
Show file tree
Hide file tree
Showing 10 changed files with 324 additions and 789 deletions.
6 changes: 4 additions & 2 deletions src/AutoML/ColumnInference/TextFileSample.cs
Expand Up @@ -97,7 +97,7 @@ public static TextFileSample CreateFromFullStream(Stream stream)
return CreateFromHead(stream);
}
var fileSize = stream.Length;

if (fileSize <= 2 * BufferSizeMb * (1 << 20))
{
return CreateFromHead(stream);
Expand Down Expand Up @@ -288,11 +288,13 @@ private static bool IsEncodingOkForSampling(byte[] buffer)
break;
}
if (utf8)
{
return true;
}

if (buffer.Take(sniffLim).Any(x => x == 0))
{
// likely a UTF-16 or UTF-32 wuthout a BOM.
// likely a UTF-16 or UTF-32 without a BOM.
return false;
}

Expand Down
495 changes: 0 additions & 495 deletions src/AutoML/Sweepers/KdoSweeper.cs

This file was deleted.

89 changes: 0 additions & 89 deletions src/AutoML/Sweepers/SweeperProbabilityUtils.cs
Expand Up @@ -10,26 +10,6 @@ namespace Microsoft.ML.Auto
{
internal sealed class SweeperProbabilityUtils
{
public SweeperProbabilityUtils()
{
}

public static double Sum(double[] a)
{
double total = 0;
foreach (double d in a)
total += d;
return total;
}

public static double NormalCdf(double x, double mean, double variance)
{
double centered = x - mean;
double ztrans = centered / (Math.Sqrt(variance) * Math.Sqrt(2));

return 0.5 * (1 + ProbabilityFunctions.Erf(ztrans));
}

public static double StdNormalPdf(double x)
{
return 1 / Math.Sqrt(2 * Math.PI) * Math.Exp(-Math.Pow(x, 2) / 2);
Expand Down Expand Up @@ -63,45 +43,6 @@ public double[] NormalRVs(int numRVs, double mu, double sigma)
return rvs.ToArray();
}

/// <summary>
/// This performs (slow) roulette-wheel sampling of a categorical distribution. Should be swapped for other
/// method as soon as one is available.
/// </summary>
/// <param name="numSamples">Number of samples to draw.</param>
/// <param name="weights">Weights for distribution (should sum to 1).</param>
/// <returns>A set of indicies indicating which element was chosen for each sample.</returns>
public int[] SampleCategoricalDistribution(int numSamples, double[] weights)
{
// Normalize weights if necessary.
double total = Sum(weights);
if (Math.Abs(1.0 - total) > 0.0001)
weights = Normalize(weights);

// Build roulette wheel.
double[] rw = new double[weights.Length];
double cs = 0.0;
for (int i = 0; i < weights.Length; i++)
{
cs += weights[i];
rw[i] = cs;
}

// Draw samples.
int[] results = new int[numSamples];
for (int i = 0; i < results.Length; i++)
{
double u = AutoMlUtils.Random.NextDouble();
results[i] = BinarySearch(rw, u, 0, rw.Length - 1);
}

return results;
}

public double SampleUniform()
{
return AutoMlUtils.Random.NextDouble();
}

/// <summary>
/// Simple binary search method for finding smallest index in array where value
/// meets or exceeds what you're looking for.
Expand All @@ -120,36 +61,6 @@ private int BinarySearch(double[] a, double u, int low, int high)
return a[mid] >= u ? BinarySearch(a, u, low, mid) : BinarySearch(a, u, mid, high);
}

public static double[] Normalize(double[] weights)
{
double total = Sum(weights);

// If all weights equal zero, set to 1 (to avoid divide by zero).
if (total <= Double.Epsilon)
{
Console.WriteLine($"{total} {Double.Epsilon}");
for(var i = 0; i < weights.Length; i++)
{
weights[i] = 1;
}
total = weights.Length;
}

for (int i = 0; i < weights.Length; i++)
weights[i] /= total;
return weights;
}

public static double[] InverseNormalize(double[] weights)
{
weights = Normalize(weights);

for (int i = 0; i < weights.Length; i++)
weights[i] = 1 - weights[i];

return Normalize(weights);
}

public static Float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, ParameterSet ps, bool expandCategoricals = true)
{
AutoMlUtils.Assert(ps.Count == sweepParams.Length);
Expand Down
59 changes: 0 additions & 59 deletions src/AutoML/Utils/Conversions.cs
Expand Up @@ -16,21 +16,6 @@ namespace Microsoft.ML.Auto

internal static class Conversions
{
/// <summary>
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
/// </summary>
public static bool TryParse(in TX src, out U1 dst)
{
ulong res;
if (!TryParse(in src, out res) || res > U1.MaxValue)
{
dst = 0;
return false;
}
dst = (U1)res;
return true;
}

/// <summary>
/// This produces zero for empty. It returns false if the text is not parsable.
/// On failure, it sets dst to the NA value.
Expand Down Expand Up @@ -207,49 +192,5 @@ public static bool TryParse(in TX src, out BL dst)
dst = false;
return false;
}

/// <summary>
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
/// </summary>
public static bool TryParse(in TX src, out U8 dst)
{
if (src.IsEmpty)
{
dst = 0;
return false;
}

return TryParseCore(src.Span, out dst);
}

private static bool TryParseCore(ReadOnlySpan<char> span, out ulong dst)
{
ulong res = 0;
int ich = 0;
while (ich < span.Length)
{
uint d = (uint)span[ich++] - (uint)'0';
if (d >= 10)
goto LFail;

// If any of the top three bits of prev are set, we're guaranteed to overflow.
if ((res & 0xE000000000000000UL) != 0)
goto LFail;

// Given that tmp = 8 * res doesn't overflow, if 10 * res + d overflows, then it overflows to
// 10 * res + d - 2^n = tmp + (2 * res + d - 2^n). Clearly the paren group is negative,
// so the new result (after overflow) will be less than tmp. The converse is also true.
ulong tmp = res << 3;
res = tmp + (res << 1) + d;
if (res < tmp)
goto LFail;
}
dst = res;
return true;

LFail:
dst = 0;
return false;
}
}
}
83 changes: 0 additions & 83 deletions src/AutoML/Utils/Stats.cs

This file was deleted.

0 comments on commit 0361c44

Please sign in to comment.