/
pipeline.R
70 lines (44 loc) · 2.03 KB
/
pipeline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
source("~/Desktop/speedDating/auxFunctions/corers.R")
source("~/Desktop/speedDating/auxFunctions/displayers.R")
source("~/Desktop/speedDating/auxFunctions/probLORConverter.R")
source("~/Desktop/speedDating/dataProcessor/dataCleaner.R")
source("~/Desktop/speedDating/dataProcessor/ratingMetrics/ratingAvgs.R")
source("~/Desktop/speedDating/dataProcessor/ratingMetrics/collabFilt.R")
source("~/Desktop/speedDating/dataProcessor/binaries/basicBinaries.R")
source("~/Desktop/speedDating/dataProcessor/merger.R")
source("~/Desktop/speedDating/dataProcessor/binaries/crossMaker.R")
source("~/Desktop/speedDating/recommendationSystem/eventScheduler.R")
source("~/Desktop/speedDating/recommendationSystem/topNLists.R")
df = read.csv("~/Desktop/speedDating/speedDatingData.csv")
oldDF = df
na_fixer=function(x){
x<-as.numeric(as.character(x))
x[is.na(x)] =median(x, na.rm=TRUE)
return(x)
}
oldDF = data.frame(apply(oldDF,2,na_fixer))
df = processData(df)
n = names(df)
df = df[!(n %in% n[grep("Act|Ind|Pref|sharRating$|probRating$|imprace$|samerace$",n)])]
df = basicBinaries(df)
df["order"] = oldDF["order"]
df = df[df[["wave"]] %in% c(2,4,7,9,11,12,15,19,21),]
answer = makeCrossHash(df, c("race", "goal", "field", "career"))
df = answer[["df"]]
crossHash = answer[["crossHash"]]
men = df[df["gender"] == 1,]
women = df[df["gender"] == 0,]
merged = mergeDF(men, women)
merged = makeCrossesAndFreqs(merged, crossHash)
raterAvgs = n[grep("Rater|Wave",n)]
bads = raterAvgs[!(raterAvgs %in% c("decRaterAvgW", "decRaterAvgM"))]
n = names(merged)
merged = merged[!(n %in% bads)]
n[grep("RatingM|AvgM",n)[-8]
colnames(merged)[grep("RatingM|AvgM",n)[-8]] = gsub("M$","W",n[grep("RatingM|AvgM",n)[-8]])
colnames(merged)[grep("RatingW|AvgW",n)[-8]] = gsub("W$","M",n[grep("RatingW|AvgW",n)[-8]])
colnames(merged)[grep("AvgW",n)[-1:0]] = gsub("W$","M",n[grep("AvgM",n)[-1:0]])
n[grep("RatingM|AvgM",n)]
merged["decM"] = merged["decRatingM"]
niceCors(merged, n[grep("AvgM",n)], "decW")
write.csv(merged, "~/Desktop/speedDating/speedDatingDataProcessed.csv")