Skip to content

Commit

Permalink
Merge pull request apache#95 from palantir/rk/merge
Browse files Browse the repository at this point in the history
merge upstream
  • Loading branch information
robert3005 committed Jan 26, 2017
2 parents ce2c144 + be90811 commit 02a61e4
Show file tree
Hide file tree
Showing 1,033 changed files with 30,619 additions and 12,913 deletions.
2 changes: 1 addition & 1 deletion R/CRAN_RELEASE.md
Expand Up @@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple

First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control.

Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release.
Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`).

To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible.

Expand Down
19 changes: 5 additions & 14 deletions R/check-cran.sh
Expand Up @@ -20,24 +20,14 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname $0`; pwd)"
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null

if [ ! -z "$R_HOME" ]
then
R_SCRIPT_PATH="$R_HOME/bin"
else
# if system wide R_HOME is not found, then exit
if [ ! `command -v R` ]; then
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
exit 1
fi
R_SCRIPT_PATH="$(dirname $(which R))"
fi
echo "USING R_HOME = $R_HOME"
. $FWDIR/find-r.sh

# Install the package (this is required for code in vignettes to run when building it later)
# Build the latest docs, but not vignettes, which is built with the package next
$FWDIR/create-docs.sh
. $FWDIR/install-dev.sh

# Build source package with vignettes
SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
Expand Down Expand Up @@ -82,4 +72,5 @@ else
# This will run tests and/or build vignettes, and require SPARK_HOME
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
fi

popd > /dev/null
11 changes: 6 additions & 5 deletions R/create-docs.sh
Expand Up @@ -29,26 +29,27 @@ set -o pipefail
set -e

# Figure out where the script is
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"

# Required for setting SPARK_SCALA_VERSION
. "${SPARK_HOME}"/bin/load-spark-env.sh

echo "Using Scala $SPARK_SCALA_VERSION"

pushd $FWDIR
pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh

# Install the package (this will also generate the Rd files)
./install-dev.sh
. $FWDIR/install-dev.sh

# Now create HTML files

# knit_rd puts html in current working directory
mkdir -p pkg/html
pushd pkg/html

Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'

popd

Expand Down
37 changes: 37 additions & 0 deletions R/create-rd.sh
@@ -0,0 +1,37 @@
#!/bin/bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This scripts packages the SparkR source files (R and C files) and
# creates a package that can be loaded in R. The package is by default installed to
# $FWDIR/lib and the package can be loaded by using the following command in R:
#
# library(SparkR, lib.loc="$FWDIR/lib")
#
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
# to load the SparkR package on the worker nodes.

set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh

# Generate Rd files if devtools is installed
"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
34 changes: 34 additions & 0 deletions R/find-r.sh
@@ -0,0 +1,34 @@
#!/bin/bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

if [ -z "$R_SCRIPT_PATH" ]
then
if [ ! -z "$R_HOME" ]
then
R_SCRIPT_PATH="$R_HOME/bin"
else
# if system wide R_HOME is not found, then exit
if [ ! `command -v R` ]; then
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
exit 1
fi
R_SCRIPT_PATH="$(dirname $(which R))"
fi
echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
fi
20 changes: 4 additions & 16 deletions R/install-dev.sh
Expand Up @@ -29,27 +29,15 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname $0`; pwd)"
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
LIB_DIR="$FWDIR/lib"

mkdir -p $LIB_DIR

pushd $FWDIR > /dev/null
if [ ! -z "$R_HOME" ]
then
R_SCRIPT_PATH="$R_HOME/bin"
else
# if system wide R_HOME is not found, then exit
if [ ! `command -v R` ]; then
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
exit 1
fi
R_SCRIPT_PATH="$(dirname $(which R))"
fi
echo "USING R_HOME = $R_HOME"

# Generate Rd files if devtools is installed
"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
. $FWDIR/find-r.sh

. $FWDIR/create-rd.sh

# Install SparkR to $LIB_DIR
"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
Expand Down
57 changes: 57 additions & 0 deletions R/install-source-package.sh
@@ -0,0 +1,57 @@
#!/bin/bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This scripts packages the SparkR source files (R and C files) and
# creates a package that can be loaded in R. The package is by default installed to
# $FWDIR/lib and the package can be loaded by using the following command in R:
#
# library(SparkR, lib.loc="$FWDIR/lib")
#
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
# to load the SparkR package on the worker nodes.

set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh

if [ -z "$VERSION" ]; then
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
fi

if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then
echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found."
echo -e "Please build R source package with check-cran.sh"
exit -1;
fi

echo "Removing lib path and installing from source package"
LIB_DIR="$FWDIR/lib"
rm -rf $LIB_DIR
mkdir -p $LIB_DIR
"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR

# Zip the SparkR package so that it can be distributed to worker nodes on YARN
pushd $LIB_DIR > /dev/null
jar cfM "$LIB_DIR/sparkr.zip" SparkR
popd > /dev/null

popd
3 changes: 3 additions & 0 deletions R/pkg/.Rbuildignore
@@ -1,5 +1,8 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.lintr$
^cran-comments\.md$
^NEWS\.md$
^README\.Rmd$
^src-native$
^html$
21 changes: 13 additions & 8 deletions R/pkg/DESCRIPTION
@@ -1,28 +1,27 @@
Package: SparkR
Type: Package
Version: 2.2.0
Title: R Frontend for Apache Spark
Version: 2.1.0
Date: 2016-11-06
Description: The SparkR package provides an R Frontend for Apache Spark.
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
email = "shivaram@cs.berkeley.edu"),
person("Xiangrui", "Meng", role = "aut",
email = "meng@databricks.com"),
person("Felix", "Cheung", role = "aut",
email = "felixcheung@apache.org"),
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
License: Apache License (== 2.0)
URL: http://www.apache.org/ http://spark.apache.org/
BugReports: http://spark.apache.org/contributing.html
Depends:
R (>= 3.0),
methods
Suggests:
knitr,
rmarkdown,
testthat,
e1071,
survival,
knitr,
rmarkdown
Description: The SparkR package provides an R frontend for Apache Spark.
License: Apache License (== 2.0)
survival
Collate:
'schema.R'
'generics.R'
Expand All @@ -42,7 +41,13 @@ Collate:
'functions.R'
'install.R'
'jvm.R'
'mllib.R'
'mllib_classification.R'
'mllib_clustering.R'
'mllib_recommendation.R'
'mllib_regression.R'
'mllib_stat.R'
'mllib_tree.R'
'mllib_utils.R'
'serialize.R'
'sparkR.R'
'stats.R'
Expand Down
4 changes: 3 additions & 1 deletion R/pkg/NAMESPACE
Expand Up @@ -3,7 +3,7 @@
importFrom("methods", "setGeneric", "setMethod", "setOldClass")
importFrom("methods", "is", "new", "signature", "show")
importFrom("stats", "gaussian", "setNames")
importFrom("utils", "download.file", "object.size", "packageVersion", "untar")
importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "untar")

# Disable native libraries till we figure out how to package it
# See SPARKR-7839
Expand All @@ -16,6 +16,7 @@ export("sparkR.stop")
export("sparkR.session.stop")
export("sparkR.conf")
export("sparkR.version")
export("sparkR.uiWebUrl")
export("print.jobj")

export("sparkR.newJObject")
Expand Down Expand Up @@ -132,6 +133,7 @@ exportMethods("arrange",
"summarize",
"summary",
"take",
"toJSON",
"transform",
"union",
"unionAll",
Expand Down

0 comments on commit 02a61e4

Please sign in to comment.