Skip to content

Commit 411a04c

Browse files
Initial Commit
1 parent 5a8d807 commit 411a04c

File tree

221 files changed

+147
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

221 files changed

+147
-0
lines changed

21-BucketJoinDemo/build.sbt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name := "BucketJoinDemo"
2+
organization := "guru.learningjournal"
3+
version := "0.1"
4+
scalaVersion := "2.12.10"
5+
6+
autoScalaLibrary := false
7+
val sparkVersion = "2.4.5"
8+
9+
val sparkDependencies = Seq(
10+
"org.apache.spark" %% "spark-core" % sparkVersion,
11+
"org.apache.spark" %% "spark-sql" % sparkVersion,
12+
"org.apache.spark" %% "spark-hive" % sparkVersion
13+
)
14+
15+
libraryDependencies ++= sparkDependencies

21-BucketJoinDemo/derby.log

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
----------------------------------------------------------------
2+
Sun Jul 05 09:30:49 IST 2020:
3+
Booting Derby version The Apache Software Foundation - Apache Derby - 10.12.1.1 - (1704137): instance a816c00e-0173-1d22-3a14-000014c76a58
4+
on database directory C:\Users\prashant\Documents\GitHub\SparkProgrammingInScala\21-BucketJoinDemo\metastore_db with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@35764bef
5+
Loaded from file:/C:/Users/prashant/AppData/Local/Coursier/cache/v1/https/repo1.maven.org/maven2/org/apache/derby/derby/10.12.1.1/derby-10.12.1.1.jar
6+
java.vendor=Oracle Corporation
7+
java.runtime.version=1.8.0_191-b12
8+
user.dir=C:\Users\prashant\Documents\GitHub\SparkProgrammingInScala\21-BucketJoinDemo
9+
os.name=Windows 10
10+
os.arch=amd64
11+
os.version=10.0
12+
derby.system.home=null
13+
Database Class Loader started - derby.database.classpath=''

21-BucketJoinDemo/log4j.properties

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Set everything to be logged to the console
2+
log4j.rootCategory=WARN, console
3+
4+
# define console appender
5+
log4j.appender.console=org.apache.log4j.ConsoleAppender
6+
log4j.appender.console.target=System.out
7+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
8+
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
9+
10+
#application log
11+
log4j.logger.guru.learningjournal.spark.examples=INFO, console, file
12+
log4j.additivity.guru.learningjournal.spark.examples=false
13+
14+
#define rolling file appender
15+
log4j.appender.file=org.apache.log4j.RollingFileAppender
16+
log4j.appender.file.File=${spark.yarn.app.container.log.dir}/${logfile.name}.log
17+
#define following in Java System
18+
# -Dlog4j.configuration=file:log4j.properties
19+
# -Dlogfile.name=hello-spark
20+
# -Dspark.yarn.app.container.log.dir=app-logs
21+
log4j.appender.file.ImmediateFlush=true
22+
log4j.appender.file.Append=false
23+
log4j.appender.file.MaxFileSize=500MB
24+
log4j.appender.file.MaxBackupIndex=2
25+
log4j.appender.file.layout=org.apache.log4j.PatternLayout
26+
log4j.appender.file.layout.conversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
27+
28+
29+
# Recommendations from Spark template
30+
log4j.logger.org.apache.hadoop=ERROR
31+
log4j.logger.org.apache.spark.repl.Main=WARN
32+
log4j.logger.org.spark_project.jetty=WARN
33+
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
34+
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
35+
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
36+
log4j.logger.org.apache.parquet=ERROR
37+
log4j.logger.parquet=ERROR
38+
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
39+
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
40+
log4j.logger.org.apache.spark.storage.DiskBlockManager=FATAL
41+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
# *************************************************************************
3+
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! ***
4+
# *** FILES IN THIS DIRECTORY AND SUBDIRECTORIES CONSTITUTE A DERBY ***
5+
# *** DATABASE, WHICH INCLUDES THE DATA (USER AND SYSTEM) AND THE ***
6+
# *** FILES NECESSARY FOR DATABASE RECOVERY. ***
7+
# *** EDITING, ADDING, OR DELETING ANY OF THESE FILES MAY CAUSE DATA ***
8+
# *** CORRUPTION AND LEAVE THE DATABASE IN A NON-RECOVERABLE STATE. ***
9+
# *************************************************************************

21-BucketJoinDemo/metastore_db/db.lck

38 Bytes
Binary file not shown.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
# *************************************************************************
3+
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! ***
4+
# *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE RECOVERY ***
5+
# *** SYSTEM. EDITING, ADDING, OR DELETING FILES IN THIS DIRECTORY ***
6+
# *** WILL CAUSE THE DERBY RECOVERY SYSTEM TO FAIL, LEADING TO ***
7+
# *** NON-RECOVERABLE CORRUPT DATABASES. ***
8+
# *************************************************************************
48 Bytes
Binary file not shown.
1 MB
Binary file not shown.
48 Bytes
Binary file not shown.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
# *************************************************************************
3+
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! ***
4+
# *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE TO STORE ***
5+
# *** USER AND SYSTEM DATA. EDITING, ADDING, OR DELETING FILES IN THIS ***
6+
# *** DIRECTORY WILL CORRUPT THE ASSOCIATED DERBY DATABASE AND MAKE ***
7+
# *** IT NON-RECOVERABLE. ***
8+
# *************************************************************************
8 KB
Binary file not shown.
16 KB
Binary file not shown.
16 KB
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
16 KB
Binary file not shown.
20 KB
Binary file not shown.
20 KB
Binary file not shown.
72 KB
Binary file not shown.
16 KB
Binary file not shown.
16 KB
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
108 KB
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
320 KB
Binary file not shown.
Binary file not shown.
20 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
28 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
68 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
28 KB
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
72 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
12 KB
Binary file not shown.
Binary file not shown.
12 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
76 KB
Binary file not shown.
64 KB
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
16 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 KB
Binary file not shown.
64 KB
Binary file not shown.
Binary file not shown.
8 KB
Binary file not shown.
8 KB
Binary file not shown.
20 KB
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#C:\Users\prashant\Documents\GitHub\SparkProgrammingInScala\20-JoinDemo\metastore_db
2+
# ********************************************************************
3+
# *** Please do NOT edit this file. ***
4+
# *** CHANGING THE CONTENT OF THIS FILE MAY CAUSE DATA CORRUPTION. ***
5+
# ********************************************************************
6+
#Sun Jul 05 09:11:24 IST 2020
7+
SysschemasIndex2Identifier=225
8+
SyscolumnsIdentifier=144
9+
SysconglomeratesIndex1Identifier=49
10+
SysconglomeratesIdentifier=32
11+
SyscolumnsIndex2Identifier=177
12+
SysschemasIndex1Identifier=209
13+
SysconglomeratesIndex3Identifier=81
14+
SystablesIndex2Identifier=129
15+
SyscolumnsIndex1Identifier=161
16+
derby.serviceProtocol=org.apache.derby.database.Database
17+
SysschemasIdentifier=192
18+
derby.storage.propertiesId=16
19+
SysconglomeratesIndex2Identifier=65
20+
derby.serviceLocale=en_US
21+
SystablesIdentifier=96
22+
SystablesIndex1Identifier=113
23+
#--- last line, don't put anything after this line ---
Binary file not shown.

21-BucketJoinDemo/spark-warehouse/my_db.db/flight_data1/_SUCCESS

Whitespace-only changes.
Binary file not shown.

21-BucketJoinDemo/spark-warehouse/my_db.db/flight_data2/_SUCCESS

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package guru.learningjournal.spark.examples
2+
3+
import org.apache.log4j.Logger
4+
import org.apache.spark.sql.SparkSession
5+
6+
object BucketJoinDemo extends Serializable {
7+
@transient lazy val logger: Logger = Logger.getLogger(getClass.getName)
8+
9+
def main(args: Array[String]): Unit = {
10+
11+
val spark = SparkSession.builder()
12+
.appName("Join Demo")
13+
.master("local[3]")
14+
.enableHiveSupport()
15+
.getOrCreate()
16+
17+
val flightDF1 = spark.read.table("MY_DB.flight_data1")
18+
val flightDF2 = spark.read.table("MY_DB.flight_data2")
19+
20+
val joinExpr = flightDF1.col("id") === flightDF2.col("id")
21+
22+
val joinDF = flightDF1.join(flightDF2, joinExpr, "inner")
23+
24+
joinDF.show()
25+
scala.io.StdIn.readLine()
26+
27+
28+
}
29+
30+
}

0 commit comments

Comments
 (0)