Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 305 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@

# Created by https://www.gitignore.io/api/sbt,java,scala,python,eclipse,intellij,intellij+all

### Eclipse ###

.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders

# External tool builders
.externalToolBuilders/

# Locally stored "Eclipse launch configurations"
*.launch

# PyDev specific (Python IDE for Eclipse)
*.pydevproject

# CDT-specific (C/C++ Development Tooling)
.cproject

# Java annotation processor (APT)
.factorypath

# PDT-specific (PHP Development Tools)
.buildpath

# sbteclipse plugin
.target

# Tern plugin
.tern-project

# TeXlipse plugin
.texlipse

# STS (Spring Tool Suite)
.springBeans

# Code Recommenders
.recommenders/

# Scala IDE specific (Scala & Java development for Eclipse)
.cache-main
.scala_dependencies
.worksheet

### Eclipse Patch ###
# Eclipse Core
.project

# JDT-specific (Eclipse Java Development Tools)
.classpath

### Intellij ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries

# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml

# Gradle:
.idea/**/gradle.xml
.idea/**/libraries

# CMake
cmake-build-debug/

# Mongo Explorer plugin:
.idea/**/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

### Intellij Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
.idea/sonarlint

### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:

# Sensitive or high-churn files:

# Gradle:

# CMake

# Mongo Explorer plugin:

## File-based project format:

## Plugin-specific files:

# IntelliJ

# mpeltonen/sbt-idea plugin

# JIRA plugin

# Cursive Clojure plugin

# Crashlytics plugin (for Android Studio and IntelliJ)

### Intellij+all Patch ###
# Ignores the whole idea folder
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

.idea/

### Java ###
# Compiled class file
*.class

# Log file
*.log

# BlueJ files
*.ctxt

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

### SBT ###
# Simple Build Tool
# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control

dist/*
lib_managed/
src_managed/
project/boot/
project/plugins/project/
.history
.lib/

### Scala ###

# End of https://www.gitignore.io/api/sbt,java,scala,python,eclipse,intellij,intellij+all

### Local ###
tmp_pipeline/
test-output-tmp/
spark-warehouse/
/python/python.iml
1 change: 1 addition & 0 deletions .sbtrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
alias assemblyAndCopy=;assembly;copyAssembledJar
55 changes: 36 additions & 19 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@

val scalaLangVersion = "2.11.11"
val sparkVersion = "2.1.1"
val sparkVer = "2.1.1"
val scalaVer = "2.11.11"
val scalaTestVersion = "3.0.0"

/** Package attributes */
name := "spark-nlp"
organization := "johnsnowlabs"
version := "1.1.0"
scalaVersion := scalaVer
sparkVersion := sparkVer


/** Spark-Package attributes */
spName := "johnsnowlabs/spark-nlp"
sparkComponents ++= Seq("mllib")
licenses += "Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")
spIncludeMaven := false
spAppendScalaVersion := false
assemblyOption in assembly := (assemblyOption in assembly).value.copy(
includeScala = false
)
credentials += Credentials(Path.userHome / ".ivy2" / ".sbtcredentials")
ivyScala := ivyScala.value map {
_.copy(overrideScalaVersion = true)
}

lazy val analyticsDependencies = Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVersion % "provided"
"org.apache.spark" %% "spark-core" % sparkVer % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVer % "provided"
)

lazy val testDependencies = Seq(
Expand All @@ -16,6 +37,14 @@ lazy val utilDependencies = Seq(
"com.typesafe" % "config" % "1.3.0"
)

lazy val root = (project in file("."))
.settings(
libraryDependencies ++=
analyticsDependencies ++
testDependencies ++
utilDependencies
)

parallelExecution in Test := false
logBuffered in Test := false

Expand All @@ -30,24 +59,12 @@ testOptions in Test += Tests.Argument("-oF")
/** Disables tests in assembly */
test in assembly := {}

lazy val root = (project in file("."))
.settings(
name := "spark-nlp",
version := "1.0.0",
organization := "com.jsl.nlp",
scalaVersion := scalaLangVersion,
libraryDependencies ++=
analyticsDependencies ++
testDependencies ++
utilDependencies
)

/** Copies the assembled jar to the pyspark/lib dir **/
lazy val copyAssembledJar = taskKey[Unit]("Copy assembled jar to pyspark/lib")

copyAssembledJar := {
val jarFilePath = (assemblyOutputPath in assembly).value
val newJarFilePath = baseDirectory( _ / "pysparknlp" / "lib" / "sparknlp.jar").value
val newJarFilePath = baseDirectory( _ / "python" / "lib" / "sparknlp.jar").value
IO.copyFile(jarFilePath, newJarFilePath)
println(s"[info] $jarFilePath copied to $newJarFilePath ")
}
}
3 changes: 3 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/"

addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.6")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.