Permalink
Browse files

Add OIdata package

1 parent 7b58abf commit bda34844d1750563df334ae31c12339895d3323e @OpenIntroOrg committed Sep 15, 2015
View
@@ -0,0 +1,14 @@
+Package: OIdata
+Type: Package
+Title: Data sets and supplements (OpenIntro)
+Version: 1.0
+Date: 2011-11-03
+Author: Andrew P Bray and David M Diez
+Maintainer: Andrew P Bray <andrew@openintro.org>
+Description: A collection of data sets from several sources that may be useful for teaching, practice, or other purposes. Functions have also been included to assist in the retrieval of table data from websites or in visualizing sample data.
+License: GPL-2 | GPL-3
+LazyLoad: yes
+LazyData: yes
+Depends: RCurl, maps, R (>= 2.10)
+Imports: RCurl, maps
+URL: http://www.openintro.org
View
@@ -0,0 +1,37 @@
+4a1c18966ecdb7e6ce77a584029131c7 *DESCRIPTION
+4758db40eeeab736d7f151d0bf823892 *NAMESPACE
+ac35959f3a9ae84f099ed62f6a9ce0f1 *R/OIdata-internal.R
+9e65a0553e46eba22c77e1cec319d50c *R/getTables.R
+8eec02a33a12eaf186ffaf9b98817206 *R/mapvar.R
+a87d5ccc39aa82d4cc156a002498c0de *R/print.htmlTables.R
+65afebcd9ef5d4322058b66b9d625094 *R/processTable.R
+23aca8b55d5d6eeefd82c04171d5e392 *R/rowScrape.R
+1a218490e0ef3508ee9f0b73e4cb2879 *R/scrapeTable.R
+2548aa437733252b084790828a8aadf8 *R/stripTags.R
+bd6513d11458a1390f3d538216680804 *data/birds.rda
+88eff2ee668dcde0846dfd0d81e44b96 *data/datalist
+8c44ece7920e381db711f96b97af5e1e *data/esi.rda
+b0af85080f7d0c6e7b67c9be30851b6c *data/ipo.rda
+67e34764bba2d8a619bc4efdd9816e54 *data/london_boroughs.rda
+556b4778314092e01024b8460df6ff81 *data/military.rda
+1aafff8beff0b3434551539c12cc3eb2 *data/murders.rda
+c76b80b2a0db7a83f857bc34793bf16f *data/piracy.rda
+6d3baa7bef14fcae6322fe86aab5b9c8 *data/sp500.rda
+7c7b29fd471b15922c116e275bd7666e *data/state.rda
+02c843cd860891fe81b5f4ed2aa521fc *data/teacher.rda
+5394f16d64ab112c16222926836ee078 *data/ukDemo.rda
+958aadf91313779a3d8ddd8753c04091 *man/OIdata-package.Rd
+706935e23fcd2f303af1cb259073f5a0 *man/birds.Rd
+e41824249fc4409a00e2f6c8db7368e5 *man/esi.Rd
+5f78460f7c59f3247bca4f0ea52e14e5 *man/getTables.Rd
+d5ae022646fd923fd43028643c74030d *man/ipo.Rd
+bd420e7296aa5993daa563b6f4ef0ea6 *man/london_boroughs.Rd
+ad04ece84801957a2cc27922c74696ae *man/mapvar.Rd
+9d9636c706ad863cf8d0a97a40e31096 *man/military.Rd
+37f8ca64ca6fe3dc045c6697072d9ed5 *man/murders.Rd
+e9754bfa09a500d4608e59ec2f5bbb4f *man/piracy.Rd
+00de9d3ed1502b4d1c013462f3bd303f *man/processTable.Rd
+dd1521aacff0fdaff671f69975478344 *man/sp500.Rd
+0a73dad652fb2a231fd6c8675a15e832 *man/state.Rd
+908e241f391bdc5f520e9d3a1ece37c2 *man/teacher.Rd
+bb56cfd6f6347c69f72ff27e86f0e248 *man/ukDemo.Rd
View
@@ -0,0 +1,7 @@
+#export(getTables)
+exportPattern("*")
+
+import(RCurl, maps)
+
+S3method(print, htmlTables)
+
@@ -0,0 +1,126 @@
+.Random.seed <-
+c(403L, 10L, -1038622987L, 295944527L, -584241578L, 1880896768L,
+2055688259L, -401079327L, -953582768L, 155356590L, -159038631L,
+2036492651L, 353644762L, -1388906084L, -1210579553L, 1532147701L,
+1991521932L, -85499038L, 86854461L, -548906057L, -10809250L,
+-381137512L, -988129029L, 1863427225L, 767517992L, 1953588182L,
+-2074797999L, 221264643L, 2031803026L, 259344420L, -1665050905L,
+1710443325L, -973532236L, -681598950L, 1301013349L, -1201660417L,
+1021448326L, -823321584L, 2070331059L, -1773351599L, -727841536L,
+-1572229154L, 554609705L, -1438479397L, 122515690L, 1308991116L,
+1039376847L, 1578382725L, 776719932L, 22496018L, 1372332045L,
+-1607957689L, 1315944750L, 776499336L, -1072360053L, -1409610391L,
+-419397832L, 383148518L, 572224897L, -442783469L, 795764418L,
+223641972L, 1903909239L, 1429695085L, -1333058300L, 422559786L,
+-365421163L, -27368913L, -1879576778L, 1401155424L, -352332381L,
+396222145L, 968447664L, -1951378994L, -1978432199L, 1759828299L,
+611330810L, 1395428348L, -1489654721L, 1047000341L, -161522132L,
+-697933054L, 104701917L, 1745033943L, -356280898L, 925528056L,
+600746971L, 1797768761L, -1469255800L, 250588982L, -563352207L,
+-116652125L, -411414798L, -882141628L, 1713346823L, -338935587L,
+599524820L, 674879738L, 381206597L, 248326303L, 627765798L, 590988976L,
+-1270011053L, -1725806991L, 44329888L, 363461438L, 430522953L,
+-508938501L, 823838410L, -210700244L, -758374353L, -151166747L,
+177932700L, 1604744178L, -2136232339L, -1007674457L, -143989298L,
+356340520L, -2133526229L, 596034377L, -831674280L, 1447447814L,
+-1344829215L, 1536319603L, -1701053214L, 357510228L, 1016566103L,
+-765464243L, 2043299044L, 1070454986L, -172673739L, 782343055L,
+-2045321066L, 2037874752L, -236050173L, -1238072031L, -1610283504L,
+2080830958L, 1198107033L, -80484821L, 428273434L, -391334436L,
+691739615L, 2128079797L, -2078386868L, 1628267426L, -445363459L,
+-1931070473L, 1477479710L, 2115350488L, -1880055493L, -394217767L,
+-1263490456L, -1994081386L, -1245044975L, -1869199165L, 1677465810L,
+499260772L, -507141977L, -1349896707L, -1695782924L, -1108741926L,
+962363941L, -1881989313L, 432139462L, 99557712L, 1457353075L,
+786856849L, -210260544L, 760965022L, -1851999895L, 1765595547L,
+1555737770L, 227408588L, 1530856335L, -622856251L, 932412284L,
+501826514L, -606956723L, 1755369991L, -870714898L, -1882256184L,
+1542259659L, -691534295L, -1494037384L, -1784362586L, -1100059711L,
+-555079597L, 2019966722L, 1807723060L, -1479229513L, 2049064877L,
+-1985159996L, -963538710L, 1762232149L, -97707793L, 1362480886L,
+-1675625952L, -1571786781L, -156171135L, -2119773456L, 1524595086L,
+-842787079L, -389421941L, 188255418L, -56965188L, 927266047L,
+-723363755L, 1759570796L, -217676862L, 99861533L, 1606956183L,
+676708862L, 1292288440L, -987116133L, -1386179591L, 1844140872L,
+549697142L, -378392655L, -2124721693L, 1733613234L, -766317820L,
+2027264071L, 224399133L, 436213140L, -678741702L, -1293458043L,
+-793924001L, -1518274330L, 1575212308L, 2021743810L, 341888096L,
+1874785660L, -1172729904L, -126713566L, 1318082216L, 153081740L,
+1308300476L, 860736306L, -106953152L, -868715340L, 1814733896L,
+2104960442L, -378557936L, -428196628L, -616021676L, -547876078L,
+252456352L, -1583842612L, 80575072L, 2024521666L, -93236440L,
+79935916L, 865504572L, 1652161394L, -44581584L, 1529423652L,
+728630584L, 537593562L, 1833156496L, -481633540L, -1814803564L,
+-1656282046L, -570526464L, -1036978116L, -1809842864L, -2087021406L,
+884796232L, -300196084L, -1116627236L, -1640272046L, -990235008L,
+804313172L, -937319448L, 1730435834L, -2018327216L, 848932652L,
+-499858572L, -898600046L, -919625120L, 1915828108L, 631289696L,
+-462179358L, 1588620200L, 777354444L, -1112491524L, -8771662L,
+-1088899600L, 610407876L, -1300108968L, 1274027386L, 1415454128L,
+1267790780L, 1600143444L, -1146581054L, 1749143584L, 673984700L,
+1776749136L, 879171554L, -1247986072L, -665285044L, 2001576188L,
+1021765618L, -590609152L, 1542542196L, 1209028616L, 1114706362L,
+1140256208L, -77745172L, -867873900L, 45198290L, 114370400L,
+784679244L, -497184480L, 261242754L, 1816536808L, 1080432876L,
+488314428L, -525241486L, 1308939888L, -60619740L, -1397014728L,
+-1089177446L, -554492208L, 885234876L, -510244204L, 1364234882L,
+-239288128L, 1228764476L, 992508048L, -1266575838L, -172371448L,
+1415765260L, -428748132L, -402749934L, -408680064L, -1438336492L,
+1403604520L, -170290886L, -2060534832L, 246840940L, 294318900L,
+-606498158L, 1343079776L, 1864416588L, 1688908768L, 512429986L,
+1929715624L, 506783628L, 2010819260L, 2012943218L, 500852976L,
+1232159812L, -724761256L, -1658232774L, -670141584L, 1514107964L,
+1336900372L, 1652950466L, 1108278496L, 1925177212L, 325113552L,
+-1112937822L, 1110656040L, 715337996L, -1933493316L, -707863246L,
+-1217411136L, -456640972L, -81017912L, 913407162L, 1731682192L,
+1551548908L, 304354388L, -2094052718L, -637703392L, 367274444L,
+749958112L, -1931702590L, 695555624L, -1731375444L, 63307452L,
+-1981300238L, -1746407120L, 1887832228L, 1923955384L, -194726054L,
+1051512208L, -348283524L, -1626928876L, -462191294L, -247966976L,
+1326039996L, -662670512L, 1381358882L, 142182728L, 75488268L,
+51811420L, 1589314770L, -1092357120L, 1549231956L, -2120263192L,
+-1237834758L, -1590631728L, 997707052L, 199443700L, 473846674L,
+604378208L, -610600692L, -554601760L, -626705054L, 1161574056L,
+-113287604L, 274682748L, -208953550L, -442611088L, 1597649220L,
+1301161944L, 1471034490L, 1899104816L, -1470490948L, -450640044L,
+-1640384958L, -1473849440L, -615267140L, 243323344L, -1828490910L,
+1835634664L, 1059812300L, 134166268L, -414616590L, -1693234048L,
+-208123660L, -1680400248L, -489230022L, -807597232L, -441036692L,
+-419681132L, 1887212882L, -1325597984L, 1127349452L, 1911448736L,
+-529775230L, -1002105624L, 1357915372L, 2049091644L, -73487246L,
+-442430352L, -532934108L, -545704392L, 1358635546L, -1063531312L,
+-729231812L, 2136066196L, -1323142014L, 899953600L, 1258792792L,
+-1065316671L, 1157960051L, -1050101100L, -173654238L, 672451127L,
+585318145L, -832219642L, 1387482180L, -131158667L, -1451683617L,
+262992952L, 32560150L, -612281309L, -646417435L, 745371138L,
+-1312220048L, 2116471321L, 1795892235L, 1180411644L, 1318100778L,
+-1766043297L, -530904855L, -1415688194L, 1861919724L, -2019342531L,
+-1775215865L, -314920208L, 1439152782L, -178031429L, 995991709L,
+654175306L, -1773193624L, 1688877649L, -311973565L, 1285521476L,
+-1423158542L, 828570311L, 2102442129L, 1900660246L, -379165580L,
+245735173L, -1948570289L, -784093368L, -362363162L, -965261293L,
+-312021131L, -1749041326L, -770402400L, 1350464137L, -66554181L,
+-2135550708L, 459683994L, 1462894095L, 883604057L, -783706386L,
+-1671854724L, 438658541L, -1020886377L, -480035488L, -798422722L,
+-1161924085L, 764765837L, -1964471366L, 171778552L, 1506332065L,
+844866387L, -2091201932L, 1260063746L, -1155883369L, -588617631L,
+413623462L, 59823268L, -310839979L, -1248543233L, -1802898280L,
+1211359670L, -2061477309L, 419659653L, 1128037410L, -800755952L,
+-1983714631L, 216670059L, -1669161380L, -2059140214L, -133325953L,
+593839817L, -1841819810L, 1061762060L, 1879577565L, 359593703L,
+491878160L, -1437178322L, 1420868827L, -732892739L, -367562966L,
+-545237048L, -53015311L, -1156619677L, -2088024476L, 82627986L,
+1865493095L, -417750479L, -854610058L, -646638444L, 1142868645L,
+-1703942225L, 920196200L, 976361542L, 669314547L, -995710635L,
+-489530574L, 1887846144L, 1795361769L, 50394715L, -699463508L,
+1470652090L, -1422123921L, 13965817L, -754680690L, 13137244L,
+-2119694899L, 669894967L, -1007463488L, -1020384098L, 1475105131L,
+-725808915L, -2018889382L, 1646184984L, -170207359L, 658478387L,
+-1619846316L, 1666805090L, 1825836535L, 1354819137L, -1653939514L,
+-1455893500L, 1187980213L, -1482064737L, 1069963512L, -302935850L,
+-648881821L, 44102949L, -1113484606L, 974544816L, 270705625L,
+-57365173L, -1747006532L, 77452266L, 1045730335L, 567906985L,
+1024529982L, 1834126124L, 624821629L, -132822073L, -1705248080L,
+1784375502L, 2089419259L, 1957352029L, 73715082L, -1759832664L,
+1976548113L, -1016620541L, -353538812L, -1115199182L, -701204601L,
+897934801L, -179218602L, 272524596L, -1497867401L)
View
@@ -0,0 +1,34 @@
+getTables <-
+function(URL){
+
+ #______ Clean HTML ______#
+ if(substr(URL, 1, 4) == "http"){
+ html <- getURL(URL)
+ } else {
+ html <- readLines(URL)
+ html <- paste(html, collapse="\n")
+ }
+ html <- gsub("\t", " ", html)
+ html <- gsub("\n", " ", html)
+ html <- gsub("\n", " ", html)
+ html <- gsub("&amp;", "&", html)
+ html <- gsub("<!--(.| )*?-->", "", html)
+
+ #______ Grab Individual Tables ______#
+ html <- strsplit(html, "<table[^>]*>")[[1]][-1]
+ N <- length(html)
+ Table <- vector("list", N)
+ for(i in N:1){
+ temp <- scrapeTable(html[i])
+ Table[[i]] <- temp
+ if((identical(Table[[i]][1,1], "NA") ||
+ identical(Table[[i]][1,1], "")) &&
+ all(dim(Table[[i]]) == 1)){
+ Table[[i]] <- NULL
+ }
+ }
+
+ #______ Wrap Up ______#
+ class(Table) <- "htmlTables"
+ return(Table)
+}
View
@@ -0,0 +1,27 @@
+mapvar <- function(val, abbr, col=1:3, Legend=""){
+ Val <- range(val, na.rm=TRUE)
+ Val <- seq(Val[1], Val[2], length.out=100)
+ Val. <- (Val-min(Val, na.rm=TRUE)) /
+ diff(range(Val, na.rm=TRUE))
+ val <- (val-min(val, na.rm=TRUE)) /
+ diff(range(val, na.rm=TRUE))
+ data(state.fips)
+ keep <- match(state.fips$abb, abbr)
+ val <- val[keep]
+ col <- col/max(col)
+ col. <- rep("#888888", length(val))
+ val. <- val[!is.na(val)]
+ col.[!is.na(val)] <- rgb(val.*col[1],
+ val.*col[2],
+ val.*col[3])
+ map("state", col=col., fill=TRUE)
+ COL <- rgb(Val.*col[1], Val.*col[2], Val.*col[3])
+ for(i in 1:length(Val)){
+ rect(-123+2*i/10, 25, -123.2+2*i/10, 26.5,
+ col=COL[i], border="#88888800")
+ }
+ text(-122.5, 26.5, Val[1], pos=3)
+ text(-113, 27.5, Legend, pos=3)
+ text(-103.5, 26.5, Val[100], pos=3)
+ invisible(data.frame(state.fips, val))
+}
@@ -0,0 +1,40 @@
+print.htmlTables <-
+function(x, d=c(4,2), n=17, ...){
+ N <- length(x)
+ if(is.null(names(x))){
+ names(x) <- paste("V", 1:length(x), sep="")
+ }
+ for(i in 1:N){
+ cat(sprintf("[[%s]]", i), "________ ")
+ t1 <- paste(nrow(x[[i]]), "by", ncol(x[[i]]))
+ t2 <- paste(rep("_", n-nchar(t1)), collapse="")
+ cat(t1, t2, "\n")
+
+ #___ Print 2x2 Table Details ___#
+ t1 <- min(d[1], nrow(x[[i]]))
+ t2 <- min(d[2], ncol(x[[i]]))
+ X <- x[[i]][1:t1, 1:t2]
+ L <- ifelse(nchar(X) > n, n-5, n)
+ X <- substr(X, 1, L)
+ X <- paste('"', X, ifelse(L == n-5, '..."', '"'), sep="")
+ X <- matrix(format(X), t1, t2)
+ for(ii in 1:t1){
+ temp <- ifelse(t2 > 1, paste("1:", d[2], sep=""), "1")
+ cat(sprintf(" [%s,%s] ", ii, temp))
+ for(jj in 1:t2){
+ cat(sprintf('%s', X[ii,jj]))
+ cat(" ")
+ }
+ if(ncol(x[[i]]) > d[2]){
+ cat("...")
+ }
+ cat("\n")
+ }
+ if(nrow(x[[i]]) > d[1]){
+ cat(" ...\n")
+ }
+ cat("\n")
+ }
+ cat("Ellipsis (...) generally denote a longer string")
+ cat("\nor additional rows or columns\n")
+}
@@ -0,0 +1,43 @@
+processTable <-
+function(Table, num=NULL, Names=NULL){
+ if(is.null(num[1])){
+ stop("'num' must be a vector of column names or numbers")
+ }
+ if(any(!num %in% 1:ncol(Table)) && (!num %in% colnames(Table))){
+ stop("'num' must be a vector of column names or numbers")
+ }
+ Table <- as.data.frame(Table)
+ for(i in num){
+ Table[,i] <- as.character(Table[,i])
+ Negative <- substr(Table[,i], 1, 1) == "-"
+ Table[,i] <- gsub(" ", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub("%", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub(",", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub("$", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub(")", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub("[(]", "-", Table[,i])
+ million <- grep("M$", Table[,i])
+ Table[,i] <- gsub("M", "", Table[,i], fixed=TRUE)
+ billion <- grep("B$", Table[,i])
+ Table[,i] <- gsub("B", "", Table[,i], fixed=TRUE)
+ Table[,i] <- gsub("[^0-9.]", "", Table[,i])
+ if(length(million) > 0){
+ Table[million,i] <- 10^6*as.numeric(Table[million,i])
+ }
+ if(length(billion) > 0){
+ Table[billion,i] <- 10^9*as.numeric(Table[billion,i])
+ }
+ Table[,i] <- as.numeric(Table[,i])*ifelse(Negative, -1, 1)
+ }
+ df <- Table #as.data.frame(Table)
+ for(i in num){
+ df[,i] <- as.numeric(as.character(df[,i]))
+ }
+ #if(is.null(Names[1]) && is.character(num)[1]){
+ #Names <- num
+ #}
+ if(!is.null(Names[1])){
+ names(df) <- Names
+ }
+ return(df)
+}
View
@@ -0,0 +1,38 @@
+rowScrape <-
+function(Row){
+ Cells <- strsplit(Row, "</t[dh][^>]*>")[[1]]
+ if(all(strsplit(tail(Cells, 1), "")[[1]] == " ")){
+ Cells <- head(Cells, -1)
+ }
+ Insides <- strsplit(Cells, "<t[dh]")
+ Insides <- sapply(Insides, "[", 2)
+ SplitCells <- strsplit(Insides, ">")
+ Attributes <- sapply(SplitCells, "[", 1)
+ Repeat <- c()
+ for(i in 1:length(Attributes)){
+ Colspan <- strsplit(Attributes[i], " colspan[ ]*=")[[1]]
+ if(length(Colspan) > 1){
+ Colspan <- Colspan[2]
+ Lead <- substr(Colspan, 1, 1)
+ if(Lead %in% c('"', "'")){
+ Repeat[i] <- strsplit(Colspan, Lead)[[1]][2]
+ } else {
+ Repeat[i] <- strsplit(Colspan, " ")[[1]][1]
+ }
+
+ } else {
+ Repeat[i] <- "1"
+ }
+ }
+ Repeat <- as.numeric(Repeat)
+
+ Cells <- strsplit(Row, "</t[dh][^>]*>")[[1]]
+ if(all(strsplit(tail(Cells, 1), "")[[1]] == " ")){
+ Cells <- head(Cells, -1)
+ }
+ Cells <- strsplit(Cells, "<t[dh][^>]*>")
+ Cells <- sapply(Cells, "[", 2)
+ Cells <- unlist(lapply(Cells, stripTags))
+ Cells <- rep(Cells, Repeat)
+ return(Cells)
+}
Oops, something went wrong.

0 comments on commit bda3484

Please sign in to comment.