this is where my **methods** begin

In [None]:
```{r}
setwd(dir = )
```

In [None]:
```{r}
install.packages("tidyverse")
install.packages("tidytext")
install.packages("magrittr")
install.packages("devtools")
install.packages("tsne")
install.packages("usethis")
install.packages("SnowballC")

```

In [None]:
```{r}
library(usethis)
library(SnowballC)
library(tidyverse)
library(tidytext)
library(magrittr)
library(devtools)
library(tsne)
library(lsa)
# Hold off on running the line below until after you get to the next section 
library(wordVectors)

```

In [None]:
```{r}
devtools::install_github('bmschmidt/wordVectors', force=TRUE)

```

In [None]:
```{r}
# Change "name_of_your_folder" to match the name of the folder with your corpus
path2file <- "data/frus/"
fileList <- list.files(path2file,full.names = TRUE) 

readTextFiles <- function(file) { # Remember that the code that defines functions must be run by putting your cursor at the beginning or end, or by selecting the whole section of code
  message(file)
  rawText = paste(scan(file, sep="\n",what="raw",strip.white = TRUE))
  output = tibble(filename=gsub(path2file,"",file),text=rawText) %>% 
    group_by(filename) %>% 
    summarise(text = paste(rawText, collapse = " "))
  return(output)
}

combinedTexts <- tibble(filename=fileList) %>% 
  group_by(filename) %>% 
  do(readTextFiles(.$filename)) 

```

In [None]:
```{r}

# Don't forget to change the text in the first line to whatever you want to call your model file
baseFile <- "test_for_cassie40"
w2vInput <- paste("data/",baseFile,".txt", sep = "")
w2vCleaned <- paste("data/",baseFile,"_cleaned.txt", sep="")
w2vBin <- paste("data/",baseFile,".bin", sep="")
combinedTexts$text %>% write_lines(w2vInput)

```

In [None]:
```{r}
THREADS <- 3

prep_word2vec(origin=w2vInput,destination=w2vCleaned,lowercase=T,bundle_ngrams=1)

#See the introductory file for a reminder on how you might adjust the parameters below
if (!file.exists(w2vBin)) {
  w2vModel <- train_word2vec(
    w2vCleaned,
    output_file=w2vBin,
    vectors=500,
    threads=THREADS,
    window=10, iter=10, negative_samples=15
  )
} else {
  w2vModel <- read.vectors(w2vBin)
}


```

In [None]:
```{r}
  w2vModel <- read.vectors("data/test_for_cassie.bin")

```

In [None]:
```{r}

w2vModel %>% plot(perplexity=10)

```

In [None]:
```{r}
w2vModel %>% closest_to("girl", 30) %>% View()
```

In [None]:
```{r}

w2vModel %>% closest_to(~"girl"+"woman"+"girls"+"women", 20) %>% View()

```

In [None]:
```{r}

centers <- 150
clustering <- kmeans(w2vModel,centers=centers,iter.max = 40)

#Change "name_of_your_query" to a descriptive name that you want to give to your export file.
w2vExport <-sapply(sample(1:centers,150),function(n) {
  names(clustering$cluster[clustering$cluster==n][1:15])
})

write.csv(file="output/euromodel3clusters.csv", x=w2vExport)

```
## Evaluate the Model

You can run this test by hitting `command-return` or `control-return` to run one line a time, or just hit the green button in the top right of the code block below. 

```{r}
files_list  = list.files(pattern="*.bin$", recursive=TRUE)

rownames <- c()

data_frame <- data.frame()
data = list(c("away", "off"),
            c("before", "after"),
            c("cause", "effects"),
            c("children", "parents"),
            c("come", "go"),
            c("day", "night"),
            c("first", "second"),
            c("good", "bad"),
            c("last", "first"),
            c("kind", "sort"),
            c("leave", "quit"),
            c("life", "death"),
            c("girl", "boy"),
            c("little", "small"))

data_list = list()

for(fn in files_list) {
  
  wwp_model = read.vectors(fn)
  sims <- c()
  for(pairs in data)
  {
    vector1 <- c()
    for(x in wwp_model[[pairs[1]]]) {
      vector1 <- c(vector1, x)
    }
    
    vector2 <- c()
    for(x in wwp_model[[pairs[2]]]) {
      vector2 <- c(vector2, x)
    }
    
    sims <- c(sims, cosine(vector1, vector2))
    f_name <- strsplit(fn, "/")[[1]][[2]]
    data_list[[f_name]] <- sims
  }
  
}

for(pairs in data) {
  rownames <- c(rownames, paste(pairs[1], pairs[2], sep="-"))
}

results <- structure(data_list,
                     class     = "data.frame",
                     row.names = rownames
)

write.csv(file="output/model-test-results.csv", x=results)

```