Skip to content
This repository has been archived by the owner on Jun 8, 2021. It is now read-only.

Commit

Permalink
Changes to support counting hash collisions.
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarowy committed Nov 20, 2017
1 parent c5c8c3a commit 3233f1e
Show file tree
Hide file tree
Showing 11 changed files with 97 additions and 3 deletions.
1 change: 1 addition & 0 deletions ExceLint/ClusterModelBuilder.fs
Expand Up @@ -448,6 +448,7 @@
weights = new Dictionary<AST.Address,double>();
clustering = m.ClusteringAtKnee;
fixes = [||];
escapehatch = None;
}
)
else
Expand Down
1 change: 1 addition & 0 deletions ExceLint/CommonTypes.fs
Expand Up @@ -152,6 +152,7 @@
weights: Weights;
clustering: Clustering;
fixes: ProposedFix[];
escapehatch: obj option;
}

type Analysis =
Expand Down
1 change: 1 addition & 0 deletions ExceLint/EntropyModelBuilder.fs
Expand Up @@ -325,6 +325,7 @@
weights = EntropyModel.Weights fixes; // this just returns entropy delta for now
clustering = CommonFunctions.ToMutableClustering (EntropyModel.RankingToClusters fixes);
fixes = [||];
escapehatch = None;
}
)
else
Expand Down
1 change: 1 addition & 0 deletions ExceLint/EntropyModelBuilder2.fs
Expand Up @@ -689,6 +689,7 @@
cutoff_idx = m.Cutoff;
weights = EntropyModel2.Weights fixeses; // this just returns entropy delta for now
clustering = CommonFunctions.ToMutableClustering (EntropyModel2.RankingToClusters fixes);
escapehatch = Some (m :> obj);
}
)
else
Expand Down
2 changes: 2 additions & 0 deletions ExceLint/ErrorModel.fs
Expand Up @@ -151,6 +151,8 @@
| Cluster a -> a.clustering
| _ -> failwith "Not valid for non-cluster analysis."

member self.Analysis = analysis

member self.inspectSelectorFor(addr: AST.Address, sel: Scope.Selector, dag: Depends.DAG) : KeyValuePair<AST.Address,(string*Countable)[]>[] =
let sID = sel.id addr dag

Expand Down
1 change: 0 additions & 1 deletion ExceLint/ExceLint.fsproj
Expand Up @@ -100,7 +100,6 @@
</ProjectReference>
<Reference Include="MathNet.Numerics">
<HintPath>..\packages\MathNet.Numerics.3.20.0\lib\net40\MathNet.Numerics.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Microsoft.Office.Interop.Excel, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" />
<Reference Include="mscorlib" />
Expand Down
1 change: 1 addition & 0 deletions ExceLint/OldClusterModel.fs
Expand Up @@ -373,6 +373,7 @@
weights = new Dictionary<AST.Address,double>();
clustering = m.ClusteringAtKnee;
fixes = [||];
escapehatch = None;
}
)
else
Expand Down
23 changes: 23 additions & 0 deletions ExceLint/Vector.fs
Expand Up @@ -846,6 +846,7 @@
(typeof<ShallowInputVectorMixedFullCVectorResultantNotOSI>.Name,
{ enabled = false; kind = ConfigKind.Feature; runner = ShallowInputVectorMixedFullCVectorResultantNotOSI.run } )

// THIS IS THE IMPORTANT ONE USED IN THE PAPER
type ShallowInputVectorMixedFullCVectorResultantOSI() =
inherit BaseFeature()
static member run(cell: AST.Address)(dag: DAG) : Countable =
Expand All @@ -862,6 +863,28 @@
static member capability : string*Capability =
(typeof<ShallowInputVectorMixedFullCVectorResultantOSI>.Name,
{ enabled = false; kind = ConfigKind.Feature; runner = ShallowInputVectorMixedFullCVectorResultantOSI.run } )
// THIS FUNCTION GETS THE VECTOR SET FOR THE ANALYSIS ABOVE
static member getPaperVectors(cell: AST.Address)(dag: DAG) : Countable[] =
let isMixed = true
let isTransitive = false
let isFormula = true
let isOffSheetInsensitive = true
let includeConstant = true
let includeLoc = true
let keepConstantValues = KeepConstantValue.No
let rebase_f = relativeToTail
let constant_f = makeConstantVectorsFromConstants keepConstantValues
let vs = getVectors cell dag (makeVector isMixed includeConstant) constant_f isTransitive isFormula
let rebased_vs = vs |> Array.map (fun v -> rebase_f v dag isOffSheetInsensitive includeLoc)
let rvarrs =
rebased_vs |>
Array.map (fun v ->
match v with
| ConstantWithLoc(x,y,z,x',y',z',c) -> Countable.FullCVectorResultant((double x, double y, double z, double x', double y', double z', double c))
| _ -> failwith "this should never happen"
) |>
Array.map (fun v -> v.LocationFree)
rvarrs

type ShallowOutputVectorMixedL2NormSum() =
inherit BaseFeature()
Expand Down
1 change: 1 addition & 0 deletions ExceLintFileFormats/ExceLintStats.cs
Expand Up @@ -115,5 +115,6 @@ public class ExceLintStatsRow
public bool OptWeightConditionSetSz { get; set; }
public double ExceLintJaccardDistance { get; set; }
public int ExceLintDeltaK { get; set; }
public int Collisions { get; set; }
}
}
2 changes: 1 addition & 1 deletion ExceLintRunner/ExceLintRunner.fsproj
Expand Up @@ -26,7 +26,7 @@
<PlatformTarget>AnyCPU</PlatformTarget>
<DocumentationFile>bin\Debug\ExceLintRunner.XML</DocumentationFile>
<Prefer32Bit>true</Prefer32Bit>
<StartArguments>"C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\spreadsheets\custodes" "C:\Users\Daniel Barowy\Desktop\benchmarks-2017-11-16" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\analysis\dissertation_annotations\true_ref_bugs.csv" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\analyses\CUSTODES\smell_detection_result.csv" "C:\ProgramData\Oracle\Java\javapath\java.exe" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\analyses\CUSTODES2\cc2.jar" -verbose -cluster -noexit -noshuffle</StartArguments>
<StartArguments>"C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\spreadsheets\custodes" "C:\Users\Daniel Barowy\Desktop\benchmarks-2017-11-16" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\analysis\dissertation_annotations\true_ref_bugs.csv" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\analyses\CUSTODES\smell_detection_result.csv" "C:\ProgramData\Oracle\Java\javapath\java.exe" "C:\Users\Daniel Barowy\Documents\Visual Studio 2017\Projects\ExceLintPaper\data\analyses\CUSTODES2\cc2.jar" -verbose -cluster -noexit -noshuffle -nocustodes</StartArguments>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
Expand Down
66 changes: 65 additions & 1 deletion ExceLintRunner/Program.fs
Expand Up @@ -36,6 +36,8 @@ open MathNet.Numerics.Distributions
custodes_time: int64;
excelint_jaccard: double;
excelint_delta_k: int;
cells: int;
collisions: int;
}

let hs_difference<'a>(hs1: HashSet<'a>)(hs2: HashSet<'a>) : HashSet<'a> =
Expand Down Expand Up @@ -227,6 +229,8 @@ open MathNet.Numerics.Distributions
else
Array.map (fun (kvp: KeyValuePair<AST.Address,double>) -> kvp.Value) (model.ranking()) |> Array.min

assert (model.AllCells.Count = stats.cells)

// write stats
let row = ExceLintStatsRow()
row.BenchmarkName <- stats.shortname
Expand Down Expand Up @@ -284,6 +288,7 @@ open MathNet.Numerics.Distributions
row.OptWeightConditionSetSz <- config.FeatureConf.IsEnabledOptWeightConditioningSetSize
row.ExceLintJaccardDistance <- stats.excelint_jaccard
row.ExceLintDeltaK <- stats.excelint_delta_k
row.Collisions <- stats.collisions

csv.WriteRow row

Expand Down Expand Up @@ -397,6 +402,57 @@ open MathNet.Numerics.Distributions
i <- i + 1
i

type SoundnessCount = { ncells: int; nnomatch: int; }

let soundness_count(model_opt: ErrorModel option)(dag: Depends.DAG) : SoundnessCount =
// get analysis base
let cells = match model_opt with | Some m -> m.AllCells | None -> failwith "does not apply"

// save set of cells that hashes to the same fingerprint
let fd = new Dict<Countable,HashSet<AST.Address>>()

// save all vectors for cells at given address
let addrv = new Dict<AST.Address,Countable[]>()

// for each cell, get vectors and fingerprint
cells |>
Seq.iter (fun cell ->
let vs = Vector.ShallowInputVectorMixedFullCVectorResultantOSI.getPaperVectors cell dag |> Array.map (fun v -> v)
let fingerprint = (Vector.ShallowInputVectorMixedFullCVectorResultantOSI.run cell dag).LocationFree

// save vectors
addrv.Add(cell, vs)

// init hashset
if not (fd.ContainsKey(fingerprint)) then
fd.Add(fingerprint, new HashSet<AST.Address>())

// get set
let hs = fd.[fingerprint]

// add to set
hs.Add cell |> ignore
)

// for each fingerprint, count
// how many of those cells' vector sets do not match
let mutable nomatch = 0
fd |>
Seq.iter (fun (kvp: KeyValuePair<Countable,HashSet<AST.Address>>) ->
let addrs = kvp.Value |> Seq.toArray
if addrs.Length > 1 then
// get the first set of vectors
let vs0 = addrv.[addrs.[0]] |> Set.ofArray
for addr in addrs do
// get the second set of vectors
let vsi = addrv.[addr] |> Set.ofArray
if vs0 <> vsi then
nomatch <- nomatch + 1
)

{ ncells = Seq.length cells; nnomatch = nomatch; }


let analyze (file: String)(app: Application)(config: Args.Config)(etruth: ExceLintGroundTruth)(ctruth: CUSTODES.GroundTruth)(csv: ExceLintStats)(debug_csv: DebugInfo) =
let shortf = (System.IO.Path.GetFileName file)

Expand All @@ -410,6 +466,8 @@ open MathNet.Numerics.Distributions

let model_opt = ExceLint.ModelBuilder.analyze (app.XLApplication()) config.FeatureConf graph (config.alpha) (Depends.Progress.NOPProgress())

let scount = soundness_count model_opt graph

let (jdist,delta_k) =
match model_opt with
| Some model ->
Expand Down Expand Up @@ -479,7 +537,11 @@ open MathNet.Numerics.Distributions
let esz = Math.Min(excelint_flags.Count, model.Cutoff + 1)
assert (esz = excelint_true_ref_TP + excelint_true_ref_FP)
let csz = custodes_flags.Count
assert (csz = custodes_true_ref_TP + custodes_true_ref_FP)
// TODO: something funny happening here: 2017-11-16
//let foo1 = custodes_true_ref_TP
//let foo2 = custodes_true_ref_FP
//let foo3 = foo1 + foo2
//assert (csz = custodes_true_ref_TP + custodes_true_ref_FP)

let stats = {
shortname = shortf;
Expand Down Expand Up @@ -508,6 +570,8 @@ open MathNet.Numerics.Distributions
custodes_time = custodes_time;
excelint_jaccard = jdist;
excelint_delta_k = delta_k;
cells = scount.ncells;
collisions = scount.nnomatch;
}

// write to per-workbook CSV
Expand Down

0 comments on commit 3233f1e

Please sign in to comment.