# Skipgram Analysis on Mozart Piano Sonatas

The counts have already been generated, now they can be analyzed.

## Setup

In [1]:
using DigitalMusicology
include("skipgrams.jl")
using Unsims

In [2]:
countsdir = "official_counts"

loadcounts(fn) = open(deserialize, joinpath(countsdir, fn))

listcounts() = filter(f -> ismatch(r".*jls", f), readdir(countsdir))

listcounts (generic function with 1 method)

In [3]:
files = listcounts()

5-element Array{String,1}:
 "counts_2x2_p1_1.0_p2_1.0_2018-03-27T15:35:02.958.jls"   
 "counts_2x3_p1_1.0_p2_0.001_2018-03-27T17:18:16.149.jls" 
 "counts_2x4_p1_1.0_p2_1.0e-6_2018-03-28T12_23_05.163.jls"
 "counts_3x2_p1_0.1_p2_1.0_2018-03-28T10:23:07.65.jls"    
 "counts_3x3_p1_0.1_p2_0.001_2018-03-28T23:58:41.003.jls" 

## Load File and Compute Ranks

In [4]:
counts = loadcounts(files[5]) # pick index of desired file

Trie{Array{DigitalMusicology.Pitches.MidiPitch,1},Float64}(...)

In [5]:
ranks = rankcounts(counts);

In [6]:
# total number of skipgrams
total = sum(map(x->x[2], ranks))

1.34234385e8

In [7]:
# number of different skipgram types found
length(ranks)

46680996

In [8]:
# print top 20 ranks
topranks(ranks, 20)

12991.0: [0, 0, 0] -> [0, 0, 0] -> [0, 0, 0]
6276.0: [0, 0, 0] -> [0, 0, 0] -> [5, 0, 0]
5942.0: [0, 0, 0] -> [5, 0, 0] -> [0, 0, 0]
5833.0: [0, 0, 0] -> [0, 0, 0] -> [9, 0, 0]
5196.0: [0, 3, 3] -> [3, 3, 3] -> [3, 3, 3]
5177.0: [0, 0, 0] -> [9, 0, 0] -> [0, 0, 0]
4608.0: [0, 0, 3] -> [10, 0, 2] -> [10, 10, 0]
4597.0: [0, 3, 3] -> [10, 0, 2] -> [10, 10, 0]
4363.0: [0, 0, 3] -> [10, 0, 2] -> [10, 0, 0]
4288.0: [0, 7, 7] -> [7, 7, 7] -> [7, 7, 7]
4254.0: [0, 3, 3] -> [10, 0, 2] -> [10, 0, 0]
4043.0: [0, 0, 0] -> [0, 0, 0] -> [10, 0, 0]
3999.0: [0, 1, 3] -> [10, 1, 1] -> [8, 10, 11]
3984.0: [0, 3, 3] -> [10, 2, 2] -> [10, 0, 0]
3958.0: [0, 3, 3] -> [0, 2, 2] -> [10, 10, 0]
3805.0: [0, 3, 3] -> [10, 2, 2] -> [10, 10, 0]
3787.0: [0, 3, 3] -> [0, 3, 3] -> [3, 3, 3]
3694.0: [0, 0, 3] -> [10, 0, 2] -> [9, 10, 0]
3686.0: [0, 0, 0] -> [10, 1, 1] -> [8, 10, 0]
3650.0: [0, 0, 0] -> [0, 0, 0] -> [4, 0, 0]


In [9]:
function latexranks(ranks, n)
  for (i, rank) in enumerate(ranks[1:n])
    gram = join(map(r -> string(tuple(r...)), rank[1]), " \\rightarrow ")
    println("\$$(i)\$ & \$$(gram)\$ & \$$(round(rank[2], -4))\$ & \$$(round(rank[2]/total, 4))\$ \\\\")
  end
end

latexranks (generic function with 1 method)

In [10]:
latexranks(ranks, 10)

$1$ & $(0, 0, 0) \rightarrow (0, 0, 0) \rightarrow (0, 0, 0)$ & $10000.0$ & $0.0001$ \\
$2$ & $(0, 0, 0) \rightarrow (0, 0, 0) \rightarrow (5, 0, 0)$ & $10000.0$ & $0.0$ \\
$3$ & $(0, 0, 0) \rightarrow (5, 0, 0) \rightarrow (0, 0, 0)$ & $10000.0$ & $0.0$ \\
$4$ & $(0, 0, 0) \rightarrow (0, 0, 0) \rightarrow (9, 0, 0)$ & $10000.0$ & $0.0$ \\
$5$ & $(0, 3, 3) \rightarrow (3, 3, 3) \rightarrow (3, 3, 3)$ & $10000.0$ & $0.0$ \\
$6$ & $(0, 0, 0) \rightarrow (9, 0, 0) \rightarrow (0, 0, 0)$ & $10000.0$ & $0.0$ \\
$7$ & $(0, 0, 3) \rightarrow (10, 0, 2) \rightarrow (10, 10, 0)$ & $0.0$ & $0.0$ \\
$8$ & $(0, 3, 3) \rightarrow (10, 0, 2) \rightarrow (10, 10, 0)$ & $0.0$ & $0.0$ \\
$9$ & $(0, 0, 3) \rightarrow (10, 0, 2) \rightarrow (10, 0, 0)$ & $0.0$ & $0.0$ \\
$10$ & $(0, 7, 7) \rightarrow (7, 7, 7) \rightarrow (7, 7, 7)$ & $0.0$ & $0.0$ \\


## Filtering

In [None]:
morepitchesthan(cand, n) = length(Set(vcat(cand...))) > n
ranksp3 = filter(r->morepitchesthan(r[1], 4), ranks)
topranks(ranksp3, 40)