diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CharactersFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CharactersFeatures.scala
index e2b6641..056f6cd 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CharactersFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CharactersFeatures.scala
@@ -1,7 +1,8 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
+import java.util.regex.{ Matcher, Pattern }
+
 import org.apache.spark.ml.linalg.{ Vector, Vectors }
-import java.util.regex.{ Pattern, Matcher }
 
 class CharactersFeatures extends Serializable {
 
@@ -10,7 +11,6 @@ class CharactersFeatures extends Serializable {
     val rounded: Double = Math.round(va * 10000).toDouble / 10000
 
     rounded
-
   }
 
   def Vector_Characters_Feature(StrValue: String): Array[Double] = {
@@ -19,148 +19,145 @@ class CharactersFeatures extends Serializable {
     var RatioValues = new Array[Double](25) //  Index is Important here
     val characterFeature_OBJ = new CharactersFeatures()
 
-    //1.Double result Value for uppercase Ration
+    // 1.Double result Value for uppercase Ration
     val uppercase = characterFeature_OBJ.UppercaseRation_Character(StrValue)
     if (!uppercase.isNaN()) {
       RatioValues(0) = RoundDouble(uppercase)
     }
-    //2.Double result Value for lowerCase Ratio
+    // 2.Double result Value for lowerCase Ratio
     val lowerCase = characterFeature_OBJ.LowercaseRation_Character(StrValue)
     if (!lowerCase.isNaN()) {
       RatioValues(1) = RoundDouble(lowerCase)
     }
-    //3.Double result Value for  Alphanumeric Ratio
+    // 3.Double result Value for  Alphanumeric Ratio
     val Alphanumeric = characterFeature_OBJ.AlphanumericRation_Character(StrValue)
     if (!Alphanumeric.isNaN()) {
       RatioValues(2) = RoundDouble(Alphanumeric)
     }
-    //4.Double result Value for ASCII Ratio
+    // 4.Double result Value for ASCII Ratio
     val ASCII = characterFeature_OBJ.ASCIIRation_Character(StrValue)
     if (!ASCII.isNaN()) {
       RatioValues(3) = RoundDouble(ASCII)
     }
-    //5.Double result Value for Bracket Ratio
+    // 5.Double result Value for Bracket Ratio
     val Bracket = characterFeature_OBJ.BracketRation_Character(StrValue)
     if (!Bracket.isNaN()) {
       RatioValues(4) = RoundDouble(Bracket)
 
     }
-    //6.Double result Value for Digits Ratio
+    // 6.Double result Value for Digits Ratio
     val Digits = characterFeature_OBJ.DigitsRation_Character(StrValue)
     if (!Digits.isNaN()) {
       RatioValues(5) = RoundDouble(Digits)
     }
-    //7.Double result Value for Latin Ratio
+    // 7.Double result Value for Latin Ratio
     val Latin = characterFeature_OBJ.Latin_Character(StrValue)
     if (!Latin.isNaN()) {
       RatioValues(6) = RoundDouble(Latin)
     }
-    //8.Double result Value for WhiteSpace Ratio
+    // 8.Double result Value for WhiteSpace Ratio
     val WhiteSpace = characterFeature_OBJ.WhiteSpace_Character(StrValue)
     if (!WhiteSpace.isNaN()) {
       RatioValues(7) = RoundDouble(WhiteSpace)
     }
-    //9.Double result Value for punc Ratio
+    // 9.Double result Value for punc Ratio
     val punc = characterFeature_OBJ.Punct_Character(StrValue)
     if (!punc.isNaN()) {
       RatioValues(8) = RoundDouble(punc)
     }
-    //10. Integer to Double result Value for LongCharacterSequence (1 integer)
+    // 10. Integer to Double result Value for LongCharacterSequence (1 integer)
     val LongCharacterSequence = characterFeature_OBJ.Longcharactersequence_Character(StrValue)
     if (!LongCharacterSequence.isNaN()) {
       RatioValues(9) = LongCharacterSequence
     }
 
-    //11.Double result Value for ArabicCharacter
+    // 11.Double result Value for ArabicCharacter
     val ArabicCharacter = characterFeature_OBJ.ArabicRation_Character(StrValue)
     if (!ArabicCharacter.isNaN()) {
       RatioValues(10) = RoundDouble(ArabicCharacter)
     }
 
-    //12.Double result Value for Bengali
+    // 12.Double result Value for Bengali
     val Bengali = characterFeature_OBJ.BengaliRation_Character(StrValue)
     if (!Bengali.isNaN()) {
       RatioValues(11) = RoundDouble(Bengali)
 
     }
 
-    //13.Double result Value for Brahmi
+    // 13.Double result Value for Brahmi
     val Brahmi = characterFeature_OBJ.BrahmiRation_Character(StrValue)
     if (!Brahmi.isNaN()) {
       RatioValues(12) = RoundDouble(Brahmi)
-
     }
 
-    //14.Double result Value for Cyrillic
+    // 14.Double result Value for Cyrillic
     val Cyrillic = characterFeature_OBJ.CyrillicRation_Character(StrValue)
     if (!Cyrillic.isNaN()) {
       RatioValues(13) = RoundDouble(Cyrillic)
-
     }
-    //15.Double result Value for Han
+    // 15.Double result Value for Han
     val Han = characterFeature_OBJ.HanRatio_Character(StrValue)
     if (!Han.isNaN()) {
       RatioValues(14) = RoundDouble(Han)
-
     }
 
-    //16.Double result Value for Malysia
+    // 16.Double result Value for Malysia
     val Malysia = characterFeature_OBJ.MalaysRatio_Character(StrValue)
     if (!Malysia.isNaN()) {
       RatioValues(15) = RoundDouble(Malysia)
     }
 
-    //17.Double result Value for Tami
+    // 17.Double result Value for Tami
     val Tami = characterFeature_OBJ.TamilRatio_Character(StrValue)
     if (!Tami.isNaN()) {
       RatioValues(16) = RoundDouble(Tami)
     }
-    //18.Double result Value for Telugu
+    // 18.Double result Value for Telugu
     val Telugu = characterFeature_OBJ.TeluguRatio_Character(StrValue)
     if (!Telugu.isNaN()) {
       RatioValues(17) = RoundDouble(Telugu)
 
     }
-    //19.Double result Value for  Symbol
+    // 19.Double result Value for  Symbol
     val Symbol = characterFeature_OBJ.Symbol_Character(StrValue)
     if (!Symbol.isNaN()) {
       RatioValues(18) = RoundDouble(Symbol)
 
     }
-    //20. Double Alphabets Ration:
+    // 20. Double Alphabets Ration:
     val Alphabets = characterFeature_OBJ.AlphaBetsRation_Character(StrValue)
     if (!Alphabets.isNaN()) {
       RatioValues(19) = RoundDouble(Alphabets)
     }
-    //21. Double AVisible character Ratio:
+    // 21. Double AVisible character Ratio:
     val Visible = characterFeature_OBJ.VisibleRation_Character(StrValue)
     if (!Visible.isNaN()) {
       RatioValues(20) = RoundDouble(Visible)
     }
 
-    //22. Double Printable character Ratio:
+    // 22. Double Printable character Ratio:
     val Printable = characterFeature_OBJ.PrintableRation_Character(StrValue)
     if (!Printable.isNaN()) {
       RatioValues(21) = RoundDouble(Printable)
     }
 
-    //23.Double Blank character Ratio:
+    // 23.Double Blank character Ratio:
     val Blank = characterFeature_OBJ.BlankRation_Character(StrValue)
     if (!Blank.isNaN()) {
       RatioValues(22) = RoundDouble(Blank)
     }
 
-    //24.Double A control character:
+    // 24.Double A control character:
     val Control = characterFeature_OBJ.ControlRation_Character(StrValue)
     if (!Control.isNaN()) {
       RatioValues(23) = RoundDouble(Control)
     }
-
-    //25. Double A hexadecimal digit :
+    // 25. Double A hexadecimal digit :
     val hexadecimal = characterFeature_OBJ.HexaRation_Character(StrValue)
     if (!hexadecimal.isNaN()) {
       RatioValues(24) = RoundDouble(hexadecimal)
     }
+
     //    val FacilityOBJ = new FacilitiesClass()
     //    val vector_Values = FacilityOBJ.ToVector(RatioValues)
 
@@ -176,7 +173,8 @@ class CharactersFeatures extends Serializable {
     }
     charRatio
   }
-  //1.Uppercase Ratio:
+
+  // 1.Uppercase Ratio:
   def UppercaseRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{javaUpperCase}")
     val result: Double = characterRatio(str, pattern)
@@ -187,51 +185,51 @@ class CharactersFeatures extends Serializable {
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //3.Alphanumeric
+  // 3.Alphanumeric
   def AlphanumericRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Alnum}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //4.ASCII
+  // 4.ASCII
   def ASCIIRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{ASCII}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //5.Bracket
+  // 5.Bracket
   def BracketRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\(|\\)|\\}|\\{|\\[|\\]")
 
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //6.Digits
+  // 6.Digits
   def DigitsRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\d")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //7.Latin
+  // 7.Latin
   def Latin_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsLatin}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //8.WhiteSpace
+  // 8.WhiteSpace
   def WhiteSpace_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\s")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //9.Punct
+  // 9.Punct
   def Punct_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Punct}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //10.Long character sequence:
+  // 10.Long character sequence:
   def Longcharactersequence_Character(str: String): Double = {
     var text: String = str
     var maxlength: Integer = null
@@ -265,96 +263,96 @@ class CharactersFeatures extends Serializable {
 
   }
 
-  //11.ARabic Ratio:
+  // 11.ARabic Ratio:
   def ArabicRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsArabic}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //12. Bengali Ratio
+  // 12. Bengali Ratio
   def BengaliRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsBengali}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //13.Brahmi Ratio
+  // 13.Brahmi Ratio
   def BrahmiRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsBrahmi}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //14.Cyrillic Ratio
+  // 14.Cyrillic Ratio
   def CyrillicRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsCyrillic}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //15.HanRatio
+  // 15.HanRatio
   def HanRatio_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsHan}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //16.Malaysian Ratio:
+  // 16.Malaysian Ratio:
   def MalaysRatio_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsMalayalam}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //17.Tamil Ratio:
+  // 17.Tamil Ratio:
   def TamilRatio_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsTamil}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //18.Telugu Ration:
+  // 18.Telugu Ration:
   def TeluguRatio_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{IsTelugu}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //19.Symbols Ratio :
+  // 19.Symbols Ratio :
   def Symbol_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("[#$%&@+-_+*/]*")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //20.Alphabets Ratio :
+  // 20.Alphabets Ratio :
   def AlphaBetsRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Alpha}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //21.A visible character Ratio:
+  // 21.A visible character Ratio:
   def VisibleRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Graph}")
     val result: Double = characterRatio(str, pattern)
     result
   }
-  //22.A printable character
+  // 22.A printable character
   def PrintableRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Print}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //23.A Black(it is different from White space) character Ratio
+  // 23.A Black(it is different from White space) character Ratio
   def BlankRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Blank}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //24.Control character  Ratio
+  // 24.Control character  Ratio
   def ControlRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Cntrl}")
     val result: Double = characterRatio(str, pattern)
     result
   }
 
-  //25.HexaDecimal character Ratio
+  // 25.HexaDecimal character Ratio
   def HexaRation_Character(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{XDigit}")
     val result: Double = characterRatio(str, pattern)
@@ -362,4 +360,4 @@ class CharactersFeatures extends Serializable {
   }
   // Character features: ------ End calculation the Ratio for character:
 
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Classifiers.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Classifiers.scala
index 3549c50..7794d85 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Classifiers.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Classifiers.scala
@@ -1,33 +1,27 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import org.apache.spark.{ SparkContext, RangePartitioner }
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql._
-import org.apache.spark.sql.types.{ DoubleType, StringType, IntegerType, StructField, StructType }
-import org.apache.spark.ml.linalg.{ Vector, Vectors }
-import org.apache.spark.ml.classification.{ GBTClassificationModel, GBTClassifier }
-import org.apache.spark.ml.classification.DecisionTreeClassificationModel
-import org.apache.spark.ml.classification.DecisionTreeClassifier
-import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
-import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
+import java.io.{ File, IOException }
+import java.text.SimpleDateFormat
+import java.util.{ Calendar, Date }
+
 import scala.collection.mutable
-import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
-import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+
+import org.apache.commons.io.FileUtils
+import org.apache.spark.{ RangePartitioner, SparkContext }
+import org.apache.spark.ml.classification.{ DecisionTreeClassificationModel, DecisionTreeClassifier, GBTClassificationModel, GBTClassifier, LogisticRegression, MultilayerPerceptronClassifier, RandomForestClassificationModel, RandomForestClassifier }
+import org.apache.spark.ml.evaluation.{ BinaryClassificationEvaluator, MulticlassClassificationEvaluator }
 import org.apache.spark.ml.feature.{ IndexToString, StringIndexer, VectorIndexer }
-import org.apache.spark.ml.classification.{ RandomForestClassificationModel, RandomForestClassifier }
 import org.apache.spark.ml.Pipeline
-import org.apache.commons.io.FileUtils;
-import java.io.File;
-import java.io.IOException;
-import java.util.Calendar
-import java.text.SimpleDateFormat
-import java.util.Date
-import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
+import org.apache.spark.ml.linalg.{ Vector, Vectors }
+import org.apache.spark.mllib.classification.{ SVMModel, SVMWithSGD }
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
+import org.apache.spark.sql.types.{ DoubleType, IntegerType, StringType, StructField, StructType }
 
 class Classifiers extends Serializable {
 
-  //1.ok -----
+  // 1.ok -----
   def RandomForestClassifer(DF_Training: DataFrame, DF_Testing: DataFrame, sc: SparkContext): String = {
 
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
@@ -48,7 +42,7 @@ class Classifiers extends Serializable {
     //    val Array(DF_Testing) = DF_Testing//.randomSplit(Array(0.100))
 
     // Train a RandomForest model.
-    val rf = new RandomForestClassifier().setImpurity("gini").setMaxDepth(3).setNumTrees(20).setFeatureSubsetStrategy("auto").setSeed(5043).setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures") //.setNumTrees(20)
+    val rf = new RandomForestClassifier().setImpurity("gini").setMaxDepth(3).setNumTrees(20).setFeatureSubsetStrategy("auto").setSeed(5043).setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures") // .setNumTrees(20)
 
     // Convert indexed labels back to original labels.
     val labelConverter = new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.labels)
@@ -66,7 +60,7 @@ class Classifiers extends Serializable {
     val finlaPrediction = predictions.select("Rid", "features", "FinalROLLBACK_REVERTED", "predictedLabel")
     predictions.show()
 
-    //Case1 : BinaryClassificationEvaluator:OK ------------------------------------------------------
+    // Case1 : BinaryClassificationEvaluator:OK ------------------------------------------------------
     val binaryClassificationEvaluator = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction")
     var results1 = 0.0
     def printlnMetricCAse1(metricName: String): Double = {
@@ -79,7 +73,7 @@ class Classifiers extends Serializable {
     val PR = printlnMetricCAse1("areaUnderPR")
 
     // Case 2: MulticlassClassificationEvaluator:OK -----------------------------------------------------
-    //Select (prediction, true label) and compute test error.
+    // Select (prediction, true label) and compute test error.
     val MulticlassClassificationEvaluator = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
     var results2 = 0.0
 
@@ -93,10 +87,10 @@ class Classifiers extends Serializable {
     val Recall = printlnMetricCase2("weightedRecall")
 
     val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
-  finalResult
+    finalResult
 
   }
-  //2.ok------
+  // 2.ok------
   def DecisionTreeClassifier(DF_Training: DataFrame, DF_Testing: DataFrame, sc: SparkContext): String = {
 
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
@@ -135,40 +129,38 @@ class Classifiers extends Serializable {
     val predictions = modelxx.transform(TestingData)
 
     // Select example rows to display.
-    //val finlaPrediction = predictions.select("Rid", "features", "FinalROLLBACK_REVERTED", "predictedLabel")
+    // val finlaPrediction = predictions.select("Rid", "features", "FinalROLLBACK_REVERTED", "predictedLabel")
 
-    //Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
+    // Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
     val binaryClassificationEvaluator = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction")
-    
-    var result1=0.0
+
+    var result1 = 0.0
     def printlnMetricCAse1(metricName: String): Double = {
-     result1 =binaryClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
-      println(metricName + " = " +result1 )
-      
+      result1 = binaryClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
+      println(metricName + " = " + result1)
+
       result1
     }
     val ROC = printlnMetricCAse1("areaUnderROC")
     val PR = printlnMetricCAse1("areaUnderPR")
 
-    //Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
-    //Select (prediction, true label) and compute test error.
+    // Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
+    // Select (prediction, true label) and compute test error.
     val MulticlassClassificationEvaluator = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
-   var result2=0.0
+    var result2 = 0.0
     def printlnMetricCase2(metricName: String): Double = {
-      result2=MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
+      result2 = MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
       println(metricName + " = " + result2)
       result2
     }
-     val accuracy = printlnMetricCase2("accuracy")
+    val accuracy = printlnMetricCase2("accuracy")
     val Precision = printlnMetricCase2("weightedPrecision")
     val Recall = printlnMetricCase2("weightedRecall")
 
-    
-        val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
+    val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
 
     finalResult
-     
-    
+
   }
 
   // 3.Ok --------
@@ -210,7 +202,7 @@ class Classifiers extends Serializable {
 
     predictions.show()
 
-    //Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
+    // Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
     val binaryClassificationEvaluator = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction")
     var results1 = 0.0
     def printlnMetricCase1(metricName: String): Double = {
@@ -222,13 +214,13 @@ class Classifiers extends Serializable {
     val ROC = printlnMetricCase1("areaUnderROC")
     val PR = printlnMetricCase1("areaUnderPR")
 
-    //Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
-    //Select (prediction, true label) and compute test error.
+    // Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
+    // Select (prediction, true label) and compute test error.
     val MulticlassClassificationEvaluator = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
-   var result2=0.0
+    var result2 = 0.0
     def printlnMetricCase2(metricName: String): Double = {
-      
-      result2=MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
+
+      result2 = MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
       println(metricName + " = " + result2)
       result2
     }
@@ -236,13 +228,12 @@ class Classifiers extends Serializable {
     val Precision = printlnMetricCase2("weightedPrecision")
     val Recall = printlnMetricCase2("weightedRecall")
 
-    
-     val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
+    val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
+
+    finalResult
 
-     finalResult
-    
   }
-  //4. OK-----
+  // 4. OK-----
   def GradientBoostedTree(DF_Training: DataFrame, DF_Testing: DataFrame, sc: SparkContext): String = {
 
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
@@ -265,7 +256,7 @@ class Classifiers extends Serializable {
     //    val Array(trainingData, testData) = Data.randomSplit(Array(0.7, 0.3))
 
     // Train a DecisionTree model.
-    val gbt = new GBTClassifier().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures") //.setMaxIter(10)
+    val gbt = new GBTClassifier().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures") // .setMaxIter(10)
 
     // Convert indexed labels back to original labels.
     val labelConverter = new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.labels)
@@ -281,7 +272,7 @@ class Classifiers extends Serializable {
 
     // Select example rows to display.
 
-    //Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
+    // Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
 
     var predictionsRDD = predictions.select("prediction", "FinalROLLBACK_REVERTED").rdd
     var predictionAndLabels = predictionsRDD.map { row => (row.get(0).asInstanceOf[Double], row.get(1).asInstanceOf[Double]) }
@@ -290,32 +281,31 @@ class Classifiers extends Serializable {
     println("Area under ROC = " + metrics.areaUnderROC())
     println("Area under PR = " + metrics.areaUnderPR())
 
-      val ROC =metrics.areaUnderROC()
-      val PR= metrics.areaUnderPR()
-    
-    
-    //Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
-    //Select (prediction, true label) and compute test error.
+    val ROC = metrics.areaUnderROC()
+    val PR = metrics.areaUnderPR()
+
+    // Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
+    // Select (prediction, true label) and compute test error.
     val MulticlassClassificationEvaluator = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
 
-    var result2=0.0 
+    var result2 = 0.0
     def printlnMetric(metricName: String): Double = {
-      
-      result2= MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
-      println(metricName + " = " +result2)
+
+      result2 = MulticlassClassificationEvaluator.setMetricName(metricName).evaluate(predictions)
+      println(metricName + " = " + result2)
       result2
     }
     val accuracy = printlnMetric("accuracy")
     val Precision = printlnMetric("weightedPrecision")
     val Recall = printlnMetric("weightedRecall")
-    
+
     val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
 
-     finalResult
+    finalResult
 
   }
 
-  //5.Ok------------
+  // 5.Ok------------
   def MultilayerPerceptronClassifier(DF_Training: DataFrame, DF_Testing: DataFrame, sc: SparkContext): String = {
 
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
@@ -352,7 +342,7 @@ class Classifiers extends Serializable {
 
     // predictions.show()
 
-    //Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
+    // Case1 : BinaryClassificationEvaluator:----------------------------------------------------------
     var predictionsDF = predictions.select("prediction", "label")
     var predictionsRDD = predictions.select("prediction", "label").rdd
     var predictionAndLabels = predictionsRDD.map { row => (row.get(0).asInstanceOf[Double], row.get(1).asInstanceOf[Double]) }
@@ -361,13 +351,10 @@ class Classifiers extends Serializable {
     println("Area under ROC = " + metrics.areaUnderROC())
     println("Area under PR = " + metrics.areaUnderPR())
 
-    
-      val ROC =metrics.areaUnderROC()
-      val PR= metrics.areaUnderPR()
-    
-    
-    
-    //Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
+    val ROC = metrics.areaUnderROC()
+    val PR = metrics.areaUnderPR()
+
+    // Case 2: MulticlassClassificationEvaluator:-----------------------------------------------------
     val accuracyevaluator = new MulticlassClassificationEvaluator().setMetricName("accuracy")
     val weightedPrecisionevaluator = new MulticlassClassificationEvaluator().setMetricName("weightedPrecision")
     val weightedRecallevaluator = new MulticlassClassificationEvaluator().setMetricName("weightedRecall")
@@ -375,22 +362,14 @@ class Classifiers extends Serializable {
     println("Accuracy = " + accuracyevaluator.evaluate(predictionsDF))
     println("weightedPrecision = " + weightedPrecisionevaluator.evaluate(predictionsDF))
     println("weightedRecall = " + weightedRecallevaluator.evaluate(predictionsDF))
-    
-    
+
     val accuracy = accuracyevaluator.evaluate(predictionsDF)
     val Precision = weightedPrecisionevaluator.evaluate(predictionsDF)
     val Recall = weightedRecallevaluator.evaluate(predictionsDF)
-    
-    
-     val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
-     finalResult
 
-    
-    
+    val finalResult = "ROC=" + ROC.toString() + "|" + "PR=" + PR.toString() + "|" + "accuracy=" + accuracy.toString() + "|" + "Precision=" + Precision.toString() + "|" + "Recall=" + Recall.toString()
+    finalResult
 
   }
 
-  
-  
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CommentProcessor.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CommentProcessor.scala
index 0f0ecc3..834cd7f 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CommentProcessor.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/CommentProcessor.scala
@@ -1,6 +1,7 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
 import java.util.regex.{ Matcher, Pattern }
+
 import org.slf4j.{ Logger, LoggerFactory }
 
 class CommentProcessor extends Serializable {
@@ -97,7 +98,7 @@ class CommentProcessor extends Serializable {
     actions
   }
 
-  //Ok: helper for Revision Features:  extract Action- subaction from comment:
+  // Ok: helper for Revision Features:  extract Action- subaction from comment:
   def Extract_ActionsOfNormalComment(comment: String): String = {
 
     var result: Boolean = false
@@ -108,7 +109,7 @@ class CommentProcessor extends Serializable {
     var Param = ""
     var parameters: Array[String] = Array.ofDim[String](0)
     var asterisk_Start = 0 // == /*
-    var asterisk_End = 0 //== */
+    var asterisk_End = 0 // == */
     var colon = 0
     if (comment != null) {
       val check_asterisk_Start = comment.contains("/*")
@@ -182,7 +183,7 @@ class CommentProcessor extends Serializable {
     var suffixComment = ""
 
     var asterisk_Start = 0 // == /*
-    var asterisk_End = 0 //== */
+    var asterisk_End = 0 // == */
     var colon = 0
 
     if (comment != null) {
@@ -246,7 +247,7 @@ class CommentProcessor extends Serializable {
     var Param = ""
     var parameters: Array[String] = Array.ofDim[String](0)
     var asterisk_Start = 0 // == /*
-    var asterisk_End = 0 //== */
+    var asterisk_End = 0 // == */
     var colon = 0
     if (comment != null) {
       val check_asterisk_Start = comment.contains("/*")
@@ -324,7 +325,7 @@ class CommentProcessor extends Serializable {
     var Param = ""
     var parameters: Array[String] = Array.ofDim[String](0)
     var asterisk_Start = 0 // == /*
-    var asterisk_End = 0 //== */
+    var asterisk_End = 0 // == */
     var colon = 0
     if (comment != null) {
       val check_asterisk_Start = comment.contains("/*")
@@ -403,7 +404,7 @@ class CommentProcessor extends Serializable {
     var Param = ""
     var parameters: Array[String] = Array.ofDim[String](0)
     var asterisk_Start = 0 // == /*
-    var asterisk_End = 0 //== */
+    var asterisk_End = 0 // == */
     var colon = 0
     if (comment != null) {
       val check_asterisk_Start = comment.contains("/*")
@@ -569,7 +570,7 @@ class CommentProcessor extends Serializable {
 
         } else {
 
-          //do not thing
+          // do not thing
 
         }
 
@@ -584,7 +585,7 @@ class CommentProcessor extends Serializable {
 
   }
 
-  //"Thecommentis" + result_Str + "&&&" + "Ac1:" + Action1 + "&&&" + "Ac2 :" + Action2 + "&&&" + "SF:" + suffixComment
+  // "Thecommentis" + result_Str + "&&&" + "Ac1:" + Action1 + "&&&" + "Ac2 :" + Action2 + "&&&" + "SF:" + suffixComment
   def isRollback(comment: String): Boolean = {
     var result: Boolean = false
     if (comment != null) {
@@ -594,8 +595,8 @@ class CommentProcessor extends Serializable {
         logger.debug("Robust but not precise rollback match (result = " + result + ") : " + tmp)
       }
     }
-    //result =  tmp.startsWith("Reverted");
-    //result =  tmp.startsWith("Reverted");
+    // result =  tmp.startsWith("Reverted");
+    // result =  tmp.startsWith("Reverted");
     result
   }
 
@@ -613,8 +614,8 @@ class CommentProcessor extends Serializable {
         }
       }
     }
-    //result = (tmp.startsWith("Undid") || tmp.startsWith("Undo")) ;
-    //result = (tmp.startsWith("Undid") || tmp.startsWith("Undo")) ;
+    // result = (tmp.startsWith("Undid") || tmp.startsWith("Undo")) ;
+    // result = (tmp.startsWith("Undid") || tmp.startsWith("Undo")) ;
     result
   }
 
@@ -632,8 +633,8 @@ class CommentProcessor extends Serializable {
         }
       }
     }
-    //result = (tmp.startsWith("Restored") || tmp.startsWith("Restore"));
-    //result = (tmp.startsWith("Restored") || tmp.startsWith("Restore"));
+    // result = (tmp.startsWith("Restored") || tmp.startsWith("Restore"));
+    // result = (tmp.startsWith("Restored") || tmp.startsWith("Restore"));
     result
   }
 
@@ -693,7 +694,7 @@ class CommentProcessor extends Serializable {
   }
 
   def getUndoneRevisionId(comment: String): Long = {
-    var result: Long = 0l
+    var result: Long = 0L
     val matcher: Matcher = ROBUST_UNDO_PATTERN.matcher(comment)
     if (matcher.matches()) {
       val str: String = matcher.group(2)
@@ -705,7 +706,7 @@ class CommentProcessor extends Serializable {
   }
 
   def getRestoredRevisionId(comment: String): Long = {
-    var result: Long = 0l
+    var result: Long = 0L
     val matcher: Matcher = ROBUST_RESTORE_PATTERN.matcher(comment)
     if (matcher.matches()) {
       val str: String = matcher.group(1)
@@ -869,4 +870,4 @@ class CommentProcessor extends Serializable {
 
   def getItemValue(): String = itemValue
 
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/FacilitiesClass.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/FacilitiesClass.scala
index a0902aa..4188dd4 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/FacilitiesClass.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/FacilitiesClass.scala
@@ -1,9 +1,9 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
+import org.apache.spark.ml.linalg.{ Vector, Vectors }
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.types.{ DoubleType, StringType, IntegerType, StructField, StructType }
-import org.apache.spark.ml.linalg.{ Vector, Vectors }
+import org.apache.spark.sql.types.{ DoubleType, IntegerType, StringType, StructField, StructType }
 
 class FacilitiesClass extends Serializable {
 
@@ -18,68 +18,68 @@ class FacilitiesClass extends Serializable {
     namesList
   }
 
-  //ok --- Used for DF Triples
+  // ok --- Used for DF Triples
   def RDD_TO_DFR_RDFXML(rdd: RDD[String], sqlContext: org.apache.spark.sql.SQLContext): DataFrame = {
-    //Create an Encoded Schema in a String Format:
+    // Create an Encoded Schema in a String Format:
     val schemaString = "Subject Predicate Object"
-    //Generate schema:
+    // Generate schema:
     val schema = StructType(schemaString.split(" ").map(fieldName ⇒ StructField(fieldName, StringType, true)))
-    //Apply Transformation for Reading Data from Text File
+    // Apply Transformation for Reading Data from Text File
     val rowRDD = rdd.map(_.split(" ")).map(e ⇒ Row(e(0), e(1), e(2)))
-    //Apply RowRDD in Row Data based on Schema:
+    // Apply RowRDD in Row Data based on Schema:
     val RDFTRIPLE = sqlContext.createDataFrame(rowRDD, schema)
-    //Store DataFrame Data into Table
+    // Store DataFrame Data into Table
     RDFTRIPLE.registerTempTable("SPO")
-    //Select Query on DataFrame
+    // Select Query on DataFrame
     val dfr = sqlContext.sql("SELECT * FROM SPO")
     dfr.show()
 
     dfr
   }
 
-  //ok --- Used for DF Triples
+  // ok --- Used for DF Triples
   def RDD_TO_DFR_TRIX(rdd: RDD[String], sqlContext: org.apache.spark.sql.SQLContext): DataFrame = {
-    //Create an Encoded Schema in a String Format:
+    // Create an Encoded Schema in a String Format:
     val schemaString = "Subject Predicate Object"
-    //Generate schema:
+    // Generate schema:
     val schema = StructType(schemaString.split(" ").map(fieldName ⇒ StructField(fieldName, StringType, true)))
-    //Apply Transformation for Reading Data from Text File
+    // Apply Transformation for Reading Data from Text File
     val rowRDD = rdd.map(_.split("><")).map(e ⇒ Row(e(0), e(1), e(2)))
-    //Apply RowRDD in Row Data based on Schema:
+    // Apply RowRDD in Row Data based on Schema:
     val RDFTRIPLE = sqlContext.createDataFrame(rowRDD, schema)
-    //Store DataFrame Data into Table
+    // Store DataFrame Data into Table
     RDFTRIPLE.registerTempTable("SPO")
-    //Select Query on DataFrame
+    // Select Query on DataFrame
     val dfr = sqlContext.sql("SELECT * FROM SPO")
     dfr.show()
 
     dfr
   }
 
-    //ok --- Used for DF Triples
+  // ok --- Used for DF Triples
   def RDD_TO_DFR_JTriple(rdd: RDD[String], sqlContext: org.apache.spark.sql.SQLContext): DataFrame = {
-    //Create an Encoded Schema in a String Format:
+    // Create an Encoded Schema in a String Format:
     val schemaString = "Subject Predicate Object"
-    //Generate schema:
+    // Generate schema:
     val schema = StructType(schemaString.split(" ").map(fieldName ⇒ StructField(fieldName, StringType, true)))
-    //Apply Transformation for Reading Data from Text File
+    // Apply Transformation for Reading Data from Text File
     val rowRDD = rdd.map(_.split(",")).map(e ⇒ Row(e(0), e(1), e(2)))
-    //Apply RowRDD in Row Data based on Schema:
+    // Apply RowRDD in Row Data based on Schema:
     val RDFTRIPLE = sqlContext.createDataFrame(rowRDD, schema)
-    //Store DataFrame Data into Table
+    // Store DataFrame Data into Table
     RDFTRIPLE.registerTempTable("SPO")
-    //Select Query on DataFrame
+    // Select Query on DataFrame
     val dfr = sqlContext.sql("SELECT * FROM SPO")
     dfr.show()
 
     dfr
   }
+
   def RoundDouble(va: Double): Double = {
 
     val rounded: Double = Math.round(va * 10000).toDouble / 10000
 
     rounded
-
   }
 
   def stringToInt(str: String): Integer = {
@@ -139,7 +139,5 @@ class FacilitiesClass extends Serializable {
     }
 
     tem.trim()
-
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ItemFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ItemFeatures.scala
index b4fc8c1..2992634 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ItemFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ItemFeatures.scala
@@ -1,10 +1,10 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import java.util.regex.{ Pattern, Matcher }
+import java.util.regex.{ Matcher, Pattern }
 
 class ItemFeatures extends Serializable {
 
-  //1.
+  // 1.
   def Get_NumberOfLabels(str: String): Double = {
 
     // from Label Tag
@@ -15,11 +15,9 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
 
-  //2.
+  // 2.
   def Get_NumberOfDescription(str: String): Double = {
 
     // from description tag
@@ -30,11 +28,9 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
 
-  //3.
+  // 3.
   def Get_NumberOfAliases(str: String): Double = {
 
     // from Aliases Tag
@@ -45,11 +41,9 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
 
-  //4.
+  // 4.
   def Get_NumberOfClaim(str: String): Double = {
 
     // from claim tag
@@ -60,10 +54,8 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
-  //5.
+  // 5.
   def Get_NumberOfSiteLinks(str: String): Double = {
 
     // from Sitelink tag
@@ -74,10 +66,8 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
-  //6.
+  // 6.
   def Get_NumberOfstatements(str: String): Double = {
 
     // from claims tag
@@ -88,10 +78,8 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
-  //7.
+  // 7.
 
   def Get_NumberOfReferences(str: String): Double = {
 
@@ -107,7 +95,7 @@ class ItemFeatures extends Serializable {
     count
   }
 
-  //8.
+  // 8.
   def Get_NumberOfQualifier(str: String): Double = {
 
     // from claims tag
@@ -118,10 +106,8 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
-  //9.
+  // 9.
   def Get_NumberOfQualifier_Order(str: String): Double = {
     // from claims tag
     val input: String = str
@@ -131,12 +117,9 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
-  //10.
+  // 10.
   def Get_NumberOfBadges(str: String): Double = {
-
     // from Sitelink  tag
     val input: String = str
     val pattern: Pattern = Pattern.compile(""""badges"""" + ":")
@@ -145,8 +128,6 @@ class ItemFeatures extends Serializable {
     while (matcher.find()) { count += 1; count - 1 }
 
     count
-
-    count
   }
 
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Main.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Main.scala
index 02f0bdd..5fa21d8 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Main.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/Main.scala
@@ -1,8 +1,7 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import org.apache.spark.{ SparkConf, SparkContext }
+import org.apache.spark.{ RangePartitioner, SparkConf, SparkContext }
 import org.apache.spark.sql._
-import org.apache.spark.{ SparkContext, RangePartitioner }
 
 object Main {
 
@@ -19,7 +18,7 @@ object Main {
     if (num == "1") {
 
       Start.Start_RDF_Parser_Appraoch(sc)
-    } // Distributed Standard Parser and Vandalism Detection : 
+    } // Distributed Standard Parser and Vandalism Detection:
     else if (num == "2") {
 
       val Training_Data = Start.Training_Start_StandardXMLParser_VD(sc)
@@ -27,22 +26,21 @@ object Main {
 
       val OBJClassifiers = new Classifiers()
 
-      //1.Random Forest Classifer:
+      // 1.Random Forest Classifer:
       val RandomForestClassifer_Values = OBJClassifiers.RandomForestClassifer(Training_Data, Testing_Data, sc)
 
-      //2.DecisionTreeClassifier
+      // 2.DecisionTreeClassifier
       val DecisionTreeClassifier_values = OBJClassifiers.DecisionTreeClassifier(Training_Data, Testing_Data, sc)
 
       // 3.LogisticRegrision
       val LogisticRegrision_values = OBJClassifiers.LogisticRegrision(Training_Data, Testing_Data, sc)
 
-      //4.GradientBoostedTree
+      // 4.GradientBoostedTree
       val GradientBoostedTree_values = OBJClassifiers.GradientBoostedTree(Training_Data, Testing_Data, sc)
 
-      //5.MultilayerPerceptronClassifier
+      // 5.MultilayerPerceptronClassifier
       val MultilayerPerceptronClassifier_values = OBJClassifiers.MultilayerPerceptronClassifier(Training_Data, Testing_Data, sc)
 
-      
       println(RandomForestClassifer_Values)
       println(DecisionTreeClassifier_values)
       println(LogisticRegrision_values)
@@ -52,4 +50,4 @@ object Main {
     }
 
   }
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseJTriple.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseJTriple.scala
index 122e297..395e53b 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseJTriple.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseJTriple.scala
@@ -1,24 +1,25 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
-import org.apache.spark.SparkContext
+
+import java.io.ByteArrayInputStream
+import java.util.ArrayList
+import java.util.regex.Pattern
+
 import org.apache.hadoop.mapred.JobConf
-import org.apache.spark.rdd.RDD
 import org.apache.jena.graph.Triple
 import org.apache.jena.rdf.model.ModelFactory
-import java.util.ArrayList
-import java.util.regex.Pattern
-import java.io.ByteArrayInputStream
- 
-class ParseJTriple extends Serializable{
-  
-  
-    def Start_JTriple_Parser(jobConf_Record: JobConf, sc: SparkContext): RDD[String] = {
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+
+class ParseJTriple extends Serializable {
+
+  def Start_JTriple_Parser(jobConf_Record: JobConf, sc: SparkContext): RDD[String] = {
 
     jobConf_Record.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader")
     jobConf_Record.set("stream.recordreader.begin", """"s":""") // start Tag
     jobConf_Record.set("stream.recordreader.end", "}") // End Tag
 
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf_Record, "hdfs://localhost:9000/mydata/xxx.json") // input path from Hadoop
-    //------------JTriple Record
+    // ------------JTriple Record
     // read data and save in RDD as block- JTriple Record
     val JTriple_Dataset_Record = sc.hadoopRDD(jobConf_Record, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text])
     // println("HelloRecords" + " " + JTriple_Dataset_Record.count)
@@ -29,14 +30,10 @@ class ParseJTriple extends Serializable{
     val RevisioninOneString = JTriple_Dataset_Record_AsstringBlock.map(line => New_abendRevision(line)).distinct().cache()
     RevisioninOneString
   }
-    def New_abendRevision(str: String): String = {
+  def New_abendRevision(str: String): String = {
 
     val s1 = str.replaceAll("[\r\n]+", " ");
-    val s2 = s1.replaceAll("[.\\s]","").trim()
-
+    val s2 = s1.replaceAll("[.\\s]", "").trim()
     s2
   }
-  
-  
-  
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseNormalXML.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseNormalXML.scala
index 5b70361..cea1e38 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseNormalXML.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseNormalXML.scala
@@ -1,19 +1,20 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import org.apache.spark.SparkContext
-import org.apache.hadoop.mapred.JobConf
-import org.apache.spark.rdd.RDD
 import java.math.BigInteger
+import java.net.InetAddress
 import java.util.ArrayList
-import org.apache.commons.lang3.ArrayUtils
 import java.util.regex.{ Matcher, Pattern }
-import java.net.InetAddress
+
+import org.apache.commons.lang3.ArrayUtils
+import org.apache.hadoop.mapred.JobConf
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 
 class ParseNormalXML extends Serializable {
 
   def Training_DB_NormalXML_Parser_Input1(sc: SparkContext): RDD[String] = {
 
-    //Streaming records:==================================================================Input Files
+    // Streaming records:==================================================================Input Files
     val jobConf = new JobConf()
     jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader")
     jobConf.set("stream.recordreader.begin", "<revision>") // start Tag
@@ -21,10 +22,10 @@ class ParseNormalXML extends Serializable {
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf, "hdfs://localhost:9000/mydata/sample.xml") // input path from Hadoop
 
     // read data and save in RDD as block
-    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) //.distinct()
+    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) // .distinct()
     println(wikiData.count)
     val RevisionTagewikidata = wikiData.map { case (x, y) => (x.toString()) }
-    //println(RevisionTagewikidata.count)
+    // println(RevisionTagewikidata.count)
 
     // ABend the revision in one line string
     val RevisioninOneString = RevisionTagewikidata.map(line => New_abendRevision(line)).cache()
@@ -37,7 +38,7 @@ class ParseNormalXML extends Serializable {
   }
   def Training_DB_NormalXML_Parser_Input2(sc: SparkContext): RDD[String] = {
 
-    //Streaming records:==================================================================Input Files
+    // Streaming records:==================================================================Input Files
     val jobConf = new JobConf()
     jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader")
     jobConf.set("stream.recordreader.begin", "<revision>") // start Tag
@@ -45,10 +46,10 @@ class ParseNormalXML extends Serializable {
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf, "hdfs://localhost:9000/mydata/2.xml") // input path from Hadoop
 
     // read data and save in RDD as block
-    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) //.distinct()
+    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) // .distinct()
     println(wikiData.count)
     val RevisionTagewikidata = wikiData.map { case (x, y) => (x.toString()) }
-    //println(RevisionTagewikidata.count)
+    // println(RevisionTagewikidata.count)
 
     // ABend the revision in one line string
     val RevisioninOneString = RevisionTagewikidata.map(line => New_abendRevision(line)).cache()
@@ -61,7 +62,7 @@ class ParseNormalXML extends Serializable {
   }
   def Training_DB_NormalXML_Parser_Input3(sc: SparkContext): RDD[String] = {
 
-   //Streaming records:==================================================================Input Files
+    // Streaming records:==================================================================Input Files
     val jobConf = new JobConf()
     jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader")
     jobConf.set("stream.recordreader.begin", "<revision>") // start Tag
@@ -69,10 +70,10 @@ class ParseNormalXML extends Serializable {
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf, "hdfs://localhost:9000/mydata/3.xml") // input path from Hadoop
 
     // read data and save in RDD as block
-    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) //.distinct()
+    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) // .distinct()
     println(wikiData.count)
     val RevisionTagewikidata = wikiData.map { case (x, y) => (x.toString()) }
-    //println(RevisionTagewikidata.count)
+    // println(RevisionTagewikidata.count)
     // ABend the revision in one line string
     val RevisioninOneString = RevisionTagewikidata.map(line => New_abendRevision(line)).cache()
     // println("TotalCount" + " " + RevisioninOneString.count)
@@ -83,9 +84,9 @@ class ParseNormalXML extends Serializable {
 
   }
 
-    def Testing_DB_NormalXML_Parser(sc: SparkContext): RDD[String] = {
+  def Testing_DB_NormalXML_Parser(sc: SparkContext): RDD[String] = {
 
-   //Streaming records:==================================================================Input Files
+    // Streaming records:==================================================================Input Files
     val jobConf = new JobConf()
     jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader")
     jobConf.set("stream.recordreader.begin", "<revision>") // start Tag
@@ -93,10 +94,10 @@ class ParseNormalXML extends Serializable {
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf, "hdfs://localhost:9000/mydata/3.xml") // input path from Hadoop
 
     // read data and save in RDD as block
-    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) //.distinct()
+    val wikiData = sc.hadoopRDD(jobConf, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text]) // .distinct()
     println(wikiData.count)
     val RevisionTagewikidata = wikiData.map { case (x, y) => (x.toString()) }
-    //println(RevisionTagewikidata.count)
+    // println(RevisionTagewikidata.count)
     // ABend the revision in one line string
     val RevisioninOneString = RevisionTagewikidata.map(line => New_abendRevision(line)).cache()
     // println("TotalCount" + " " + RevisioninOneString.count)
@@ -107,11 +108,6 @@ class ParseNormalXML extends Serializable {
 
   }
 
-  
-  
-  
-
-  
   // make the revision as one string
   def New_abendRevision(str: String): String = {
 
@@ -125,13 +121,13 @@ class ParseNormalXML extends Serializable {
   // Ok:  used on the Top
   def New_Build_Revision_map(obj: String): String = {
     var Store_Record_String = ""
-    //Json Revision :
+    // Json Revision :
     val JsonStr = Get_Json_Revision(obj)
     val Standered_JsonStr = Standared_Get_Json_Revision(obj) // for full string Jason with all formating for parsing by spark
     val Json_Standered = Standered_JsonStr.get(0).toString() // for full string Jason with all formating for parsing by spark
     val Json = JsonStr.get(0).toString()
 
-    //0.Id Revision
+    // 0.Id Revision
     val IdRevision = Get_ID_Revision(obj)
     if (IdRevision != "") {
       val ID = IdRevision.toString().trim()
@@ -141,7 +137,7 @@ class ParseNormalXML extends Serializable {
     //    else {
     //      Store_Record_String = "0"
     //    }
-    //1. Item Title :
+    // 1. Item Title :
     val ItemTitle: ArrayList[String] = Get_Item_Title_FromJson(Json)
     if (ItemTitle.size() > 0) {
       val groupItemTilte = ItemTitle.get(0).toString()
@@ -164,8 +160,8 @@ class ParseNormalXML extends Serializable {
       }
     }
 
-    //=============Start:======= extract information from the json string
-    //2.Comments :
+    // =============Start:======= extract information from the json string
+    // 2.Comments :
     val commentarray = Get_Comment(obj)
     val comment = commentarray.get(0)
     if (comment.nonEmpty) {
@@ -174,7 +170,7 @@ class ParseNormalXML extends Serializable {
       Store_Record_String = Store_Record_String.trim() + "NNLL" + "NA"
     }
 
-    //3.Parent ID :
+    // 3.Parent ID :
     val ParentIDStr = Get_ParentID(obj)
 
     if (ParentIDStr.nonEmpty) {
@@ -185,7 +181,7 @@ class ParseNormalXML extends Serializable {
       Store_Record_String = Store_Record_String + "NNLL" + "0"
 
     }
-    //4.Timestamp:
+    // 4.Timestamp:
     val TimeStamparray = Get_TIMEStamp(obj)
     val TimeSta = TimeStamparray.get(0)
     if (TimeSta.nonEmpty) {
@@ -194,41 +190,41 @@ class ParseNormalXML extends Serializable {
       Store_Record_String = Store_Record_String + "NNLL" + "NA"
     }
 
-    //5. Contributor Data( IP ):
+    // 5. Contributor Data( IP ):
     val Contributstr = Get_Contributor_IP(obj)
-    //val ContributorSta = Contributorarray.get(0)
+    // val ContributorSta = Contributorarray.get(0)
     if (Contributstr != "0") {
       Store_Record_String = Store_Record_String + "NNLL" + Contributstr.trim()
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "0"
     }
 
-    //6. Contributor ID :
+    // 6. Contributor ID :
     val Contributor_IDStr = Get_Contributor_ID(obj)
-    //val Contributor_IDSta = Contributor_IDarray.get(0)
+    // val Contributor_IDSta = Contributor_IDarray.get(0)
     if (Contributor_IDStr != "0") {
       Store_Record_String = Store_Record_String + "NNLL" + Contributor_IDStr.trim()
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "0"
     }
 
-    //7. Contributor Name :
+    // 7. Contributor Name :
     val Contributor_NameStr = Get_Contributor_Name(obj)
-    //val Contributor_IDSta = Contributor_IDarray.get(0)
+    // val Contributor_IDSta = Contributor_IDarray.get(0)
     if (Contributor_NameStr != "NA") {
       Store_Record_String = Store_Record_String + "NNLL" + Contributor_NameStr.trim()
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "NA"
     }
 
-    //8. Full Json Tag for Parsing:
+    // 8. Full Json Tag for Parsing:
     if (Json_Standered.nonEmpty) {
       Store_Record_String = Store_Record_String + "NNLL" + Json_Standered.trim()
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "NA"
     }
 
-    //9. Model :
+    // 9. Model :
 
     val modelstr = Get_Model(obj)
     if (modelstr.nonEmpty) {
@@ -236,14 +232,14 @@ class ParseNormalXML extends Serializable {
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "NA"
     }
-    //10.Format:
+    // 10.Format:
     val Formatstr = Get_Format(obj)
     if (Formatstr.nonEmpty) {
       Store_Record_String = Store_Record_String + "NNLL" + Formatstr.trim()
     } else {
       Store_Record_String = Store_Record_String + "NNLL" + "NA"
     }
-    //11.SHA1 :
+    // 11.SHA1 :
     val SHAstr = Get_SHA1(obj)
     if (SHAstr.nonEmpty) {
       Store_Record_String = Store_Record_String + "NNLL" + SHAstr.trim()
@@ -290,8 +286,8 @@ class ParseNormalXML extends Serializable {
 
     }
 
-    //**********************
-    //   if (str.contains("</id><parentid>")){
+    // **********************
+    //   if (str.contains("</id><parentid>")) {
     //
     //        val inputID: CharSequence = str
     //        val pattStr_id: String = "<revision><id>[0-9]+</id><parentid>"
@@ -306,7 +302,7 @@ class ParseNormalXML extends Serializable {
     //     }
     //  }
     //
-    //    else if (str.contains("</id><timestamp>")){
+    //    else if (str.contains("</id><timestamp>")) {
     //
     //       val inputID: CharSequence = str
     //        val pattStr_id: String = "<revision><id>[0-9]+</id><timestamp>"
@@ -327,7 +323,7 @@ class ParseNormalXML extends Serializable {
     tem
   }
 
-  //Extract TimeStampe value from  Tag:
+  // Extract TimeStampe value from  Tag:
   def Get_TIMEStamp(str: String): ArrayList[String] = {
 
     val TimeStamp: ArrayList[String] = new ArrayList[String]()
@@ -382,7 +378,7 @@ class ParseNormalXML extends Serializable {
 
   }
 
-  //extract Item Title from Json string
+  // extract Item Title from Json string
   def Get_Item_Title_FromJson(str: String): ArrayList[String] = {
 
     val Item_Title_FromJason: ArrayList[String] = new ArrayList[String]()
@@ -634,5 +630,4 @@ class ParseNormalXML extends Serializable {
     }
     temp
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseRDFXML.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseRDFXML.scala
index 3f83897..2add40c 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseRDFXML.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseRDFXML.scala
@@ -1,13 +1,14 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import org.apache.spark.SparkContext
+import java.io.ByteArrayInputStream
+import java.util.ArrayList
+import java.util.regex.Pattern
+
 import org.apache.hadoop.mapred.JobConf
-import org.apache.spark.rdd.RDD
 import org.apache.jena.graph.Triple
 import org.apache.jena.rdf.model.ModelFactory
-import java.util.ArrayList
-import java.util.regex.Pattern
-import java.io.ByteArrayInputStream
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 
 class ParseRDFXML extends Serializable {
 
@@ -24,7 +25,7 @@ class ParseRDFXML extends Serializable {
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf_Record, "hdfs://localhost:9000/mydata/Germany.rdf") // input path from Hadoop
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf_Prefixes, "hdfs://localhost:9000/mydata/Germany.rdf") // input path from Hadoop
 
-    //------------ RDF XML Record
+    // ------------ RDF XML Record
     // read data and save in RDD as block- RDFXML Record
     val RDFXML_Dataset_Record = sc.hadoopRDD(jobConf_Record, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text])
     //      println("HelloRecords" + " " + RDFXML_Dataset_Record.count)
@@ -34,14 +35,14 @@ class ParseRDFXML extends Serializable {
     println("HelloRecords" + " " + RDFXML_Dataset_Record_AsstringBlock.count)
     //      RDFXML_Dataset_Record_AsstringBlock.foreach(println)
 
-    //-------------RDF XML Prefixes
+    // -------------RDF XML Prefixes
     // read data and save in RDD as block- RDFXML Prefixes
     val RDFXML_Dataset_Prefixes = sc.hadoopRDD(jobConf_Prefixes, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text])
     println("HelloPrefixes" + " " + RDFXML_Dataset_Prefixes.count)
     //      RDFXML_Dataset_Prefixes.foreach(println)
     // Convert the block- RDFXML Prefixes to String DataType
     var RDFXML_Dataset_AsstringPrefixes_WithoutDist = RDFXML_Dataset_Prefixes.map { case (x, y) => (x.toString()) }
-    val RDFXML_Dataset_AsstringPrefixes=RDFXML_Dataset_AsstringPrefixes_WithoutDist.distinct()
+    val RDFXML_Dataset_AsstringPrefixes = RDFXML_Dataset_AsstringPrefixes_WithoutDist.distinct()
     println("HelloPrefixes" + " " + RDFXML_Dataset_AsstringPrefixes.count)
     //      RDFXML_Dataset_AsstringPrefixes.foreach(println)
     val pref = RDFXML_Dataset_AsstringPrefixes.reduce((a, b) => a + "\n" + b)
@@ -88,5 +89,4 @@ class ParseRDFXML extends Serializable {
     val str = Arraylistval.get(0).toString()
     str
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseTRIX.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseTRIX.scala
index 3bd8364..f3a4201 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseTRIX.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/ParseTRIX.scala
@@ -1,12 +1,14 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
-import org.apache.spark.SparkContext
+
+import java.io.ByteArrayInputStream
+import java.util.ArrayList
+import java.util.regex.Pattern
+
 import org.apache.hadoop.mapred.JobConf
-import org.apache.spark.rdd.RDD
 import org.apache.jena.graph.Triple
 import org.apache.jena.rdf.model.ModelFactory
-import java.util.ArrayList
-import java.util.regex.Pattern
-import java.io.ByteArrayInputStream
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 
 class ParseTRIX extends Serializable {
 
@@ -18,7 +20,7 @@ class ParseTRIX extends Serializable {
 
     org.apache.hadoop.mapred.FileInputFormat.addInputPaths(jobConf_Record, "hdfs://localhost:9000/mydata/xx.trix") // input path from Hadoop
 
-    //------------TRIX Record
+    // ------------TRIX Record
     // read data and save in RDD as block- TRIX Record
     val TRIX_Dataset_Record = sc.hadoopRDD(jobConf_Record, classOf[org.apache.hadoop.streaming.StreamInputFormat], classOf[org.apache.hadoop.io.Text], classOf[org.apache.hadoop.io.Text])
     //      println("HelloRecords" + " " + TRIX_Dataset_Record.count)
@@ -43,11 +45,9 @@ class ParseTRIX extends Serializable {
     s4
   }
 
-
   // This function for TRIX case.
   def arrayListTOstring(Arraylistval: ArrayList[Triple]): String = {
     val str = Arraylistval.get(0).toString()
     str
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/RevisionFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/RevisionFeatures.scala
index 7dc3c19..ccbd2b4 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/RevisionFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/RevisionFeatures.scala
@@ -1,4 +1,5 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
+
 import java.util.regex.{ Matcher, Pattern }
 
 class RevisionFeatures extends Serializable {
@@ -53,7 +54,7 @@ class RevisionFeatures extends Serializable {
 
     }
 
-    //      if (result_isNonLatin==true){ // is matched
+    //      if (result_isNonLatin==true) { // is matched
     //
     //        Final_Result=false
     //
@@ -123,4 +124,4 @@ class RevisionFeatures extends Serializable {
 
   }
 
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/SentencesFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/SentencesFeatures.scala
index 5490ec1..62c0432 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/SentencesFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/SentencesFeatures.scala
@@ -13,7 +13,7 @@ class SentencesFeatures extends Serializable {
 
   }
 
-  //1.comment tail Lenght  Action subaction param+ tail
+  // 1.comment tail Lenght  Action subaction param+ tail
   def CommentTailLenght(Full_Comment_Str: String): Integer = {
     val parsedCommment_OBJ = new CommentProcessor()
     val commentTail_Str = parsedCommment_OBJ.Extract_CommentTail(Full_Comment_Str)
@@ -23,9 +23,9 @@ class SentencesFeatures extends Serializable {
   }
   // similarity  between the comment ( suffix of the comment = Tail ) where the comment is normal comment /* .........*/ or  /* .........
   // e.g This comment includes wb...sitelink
-  //1-we have to be sure the comment is normal comment take the form /* ........./*
-  //2-Next step: we check the Action part if it includes a sitelink word or not.
-  //3-we compare the suffix in this case to  site link with pay attention to  the same language.
+  // 1-we have to be sure the comment is normal comment take the form /* ........./*
+  // 2-Next step: we check the Action part if it includes a sitelink word or not.
+  // 3-we compare the suffix in this case to  site link with pay attention to  the same language.
 
   // we check the type of Normal comment if it contains Aliases  .
   def extract_CommentAliases_LanguageType(Full_Comment_Str: String): String = {
@@ -185,5 +185,4 @@ class SentencesFeatures extends Serializable {
     langeType.trim()
 
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/StatementFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/StatementFeatures.scala
index 33b1b5a..31d1158 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/StatementFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/StatementFeatures.scala
@@ -42,5 +42,4 @@ class StatementFeatures extends Serializable {
     }
     result
   }
-
-}
\ No newline at end of file
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/VandalismDetection.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/VandalismDetection.scala
index 065adb1..2a9f380 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/VandalismDetection.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/VandalismDetection.scala
@@ -1,31 +1,29 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import org.apache.spark.{ SparkContext, RangePartitioner }
-import org.apache.spark.sql._
-import org.apache.spark.sql.expressions.Window
-import org.apache.hadoop.mapred.JobConf
 import java.util.Scanner
-import org.json.JSONObject
+
 import org.apache.commons.lang3.StringUtils
-import org.apache.spark.sql.functions.{ concat, lit }
+import org.apache.hadoop.mapred.JobConf
+import org.apache.spark.{ RangePartitioner, SparkContext }
 import org.apache.spark.ml.feature.{ Word2Vec, Word2VecModel }
-import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.feature.VectorAssembler
 import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql._
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions.{ concat, lit }
+import org.json.JSONObject
 
 class VandalismDetection extends Serializable {
 
-  
-  
-  
   // Function 1 : Distributed RDF Parser Approach
   def Start_RDF_Parser_Appraoch(sc: SparkContext): Unit = {
-    
+
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
     import sqlContext.implicits._
     import org.apache.spark.sql.functions._ // for UDF
     import org.apache.spark.sql.types._
-    
+
     println("*********************************************************************")
     println("Distributed RDF Parser Model")
     println("Please Enter 1 for JTriple and  2 for TRIX  process and 3 for RDFXML:")
@@ -41,12 +39,11 @@ class VandalismDetection extends Serializable {
       val DRF_Builder_JTripleOBJ = new FacilitiesClass()
       val RDD_JTriple = JTriple_Parser_OBJ.Start_JTriple_Parser(jobConf, sc)
       RDD_JTriple.foreach(println)
-      //----------------------------DF for RDF TRIX ------------------------------------------
+      // ----------------------------DF for RDF TRIX ------------------------------------------
       //  Create SQLContext Object:
       val sqlContext = new org.apache.spark.sql.SQLContext(sc)
       val DFR_JTriple = DRF_Builder_JTripleOBJ.RDD_TO_DFR_JTriple(RDD_JTriple, sqlContext)
       DFR_JTriple.show()
-      
 
     } else if (num == "2") {
 
@@ -57,12 +54,11 @@ class VandalismDetection extends Serializable {
       val DRF_Builder_RDFTRIX_OBJ = new FacilitiesClass()
       val RDD_TRIX = TRIX_Parser_OBJ.Start_TriX_Parser(jobConf, sc)
       RDD_TRIX.foreach(println)
-      //----------------------------DF for RDF TRIX ------------------------------------------
+      // ----------------------------DF for RDF TRIX ------------------------------------------
       //  Create SQLContext Object:
       val sqlContext = new org.apache.spark.sql.SQLContext(sc)
       val DFR_TRIX = DRF_Builder_RDFTRIX_OBJ.RDD_TO_DFR_TRIX(RDD_TRIX, sqlContext)
       DFR_TRIX.show()
-      
 
     } else if (num == "3") {
       println("RDF XML .........!!!!!!")
@@ -83,1908 +79,1236 @@ class VandalismDetection extends Serializable {
       DFR_RDF_XML.show()
 
     }
-    
-  sc.stop()
+
+    sc.stop()
   }
-  
-  //***********************************************************************************************************************************************
-  // Function 2:Training XML and Vandalism Detection 
+
+  // *********************************************************************************
+  // Function 2:Training XML and Vandalism Detection
   def Training_Start_StandardXMLParser_VD(sc: SparkContext): DataFrame = {
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
     import sqlContext.implicits._
     import org.apache.spark.sql.functions._ // for UDF
     import org.apache.spark.sql.types._
 
-          // Streaming records:
-      val jobConf = new JobConf()
-      val NormalXML_Parser_OBJ = new ParseNormalXML()
-      val RDD_OBJ = new ParseNormalXML()
-    
-      val Training_RDD_All_Record1 = RDD_OBJ.Training_DB_NormalXML_Parser_Input1(sc)
-      val Training_RDD_All_Record2 = RDD_OBJ.Training_DB_NormalXML_Parser_Input2(sc)
-      val Training_RDD_All_Record3 = RDD_OBJ.Training_DB_NormalXML_Parser_Input3(sc)
-      //RDD_All_Record1.foreach(println)
-      //RDD_All_Record2.foreach(println)
-      // RDD_All_Record3.foreach(println)
-
-      val Training_RDD_All_Record = Training_RDD_All_Record1.union(Training_RDD_All_Record2).union(Training_RDD_All_Record3).distinct().cache()
-
-      //println(RDD_All_Record.count())
-       println(Training_RDD_All_Record.count())
-
-      // ======= Json part :
-      //Json RDD : Each record has its Revision iD:
-      val JsonRDD = Training_RDD_All_Record.map(_.split("NNLL")).map(v => replacing_with_Quoto(v(0), v(8))).cache()
-      //JsonRDD.foreach(println)
-      //println(JsonRDD.count())
-
-      // Data set
-      val Ds_Json = sqlContext.jsonRDD(JsonRDD).select("key", "id", "labels", "descriptions", "aliases", "claims", "sitelinks").cache()
-      //Ds_Json.show()
-      // println(Ds_Json.count())
-
-      // ======= Tags part : // Contributor IP here is in Decimal format not IP format and It is converted in ParseNormalXml stage
-      val TagsRDD = Training_RDD_All_Record.map(_.split("NNLL")).map(x => (x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11))).cache()
-      val DF_Tags = TagsRDD.toDF("Rid", "Itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "model", "format", "sha").cache()
-      //    DF_Tags.show()
-      //    println(DF_Tags.count())
-
-      //======== Join Json part with Tag Part:============================
-      //Joining to have full data
-      val DF_First_DF_Result_Join_Tags_and_Json = DF_Tags.as("T1").join(Ds_Json.as("T2"), $"T1.Rid" === $"T2.key", "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha") //.orderBy("Rid", "Itemid")
-      DF_First_DF_Result_Join_Tags_and_Json.registerTempTable("Data1")
-      val dfr_DATA_JsonTages1 = sqlContext.sql("select * from Data1 order by itemid ,Rid ").cache()
-
-      val colNames = Seq("Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-      val DF_Second = DF_First_DF_Result_Join_Tags_and_Json.toDF(colNames: _*) //.distinct()
-      DF_Second.registerTempTable("Data2")
-
-      //===================================================================Parent // Previous Revision==============================================================================================================
-      //val DF_Joined = result1.as("df1").join(result2.as("df2"), col("itemid") === col("itemid2") && col("index1") === col("index2") + 1, "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha", "Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-      //.select("itemid", "Rid","pid","time","itemid2","Rid2","pid2","time2")
-
-      //Joining based on Parent Id to get the previous cases: ParentID
-      val DF_Joined = DF_First_DF_Result_Join_Tags_and_Json.as("df1").join(DF_Second.as("df2"), $"df1.pid" === $"df2.Rid2", "leftouter").distinct()
-
-      val RDD_After_JoinDF = DF_Joined.rdd.distinct()
-      val x = RDD_After_JoinDF.map(row => (row(0).toString().toInt, row)).cache()
-      val part = new RangePartitioner(4, x)
-      val partitioned = x.partitionBy(part).persist() // persist is important for this case and obligatory.
-      //partitioned.foreach(println)
-      //
-      //      //=====================================================All Features Based on Categories of Features Data Type :==================================================================================
-      //
-      val Result_all_Features = partitioned.map { case (x, y) => (x.toString() + "," + All_Features(y).toString()) } // we convert the Pair RDD to String one LineRDD to be able to make DF based on ","
-      //Result_all_Features.foreach(println)
-      // println("nayef" + Result_all_Features.count())
-
-      // Conver the RDD of All Features to  DataFrame:
-
-      val schema = StructType(
-
-        //0
-        StructField("Rid", IntegerType, false) ::
-
-          // Character Features :
-          /* 1*/ StructField("C1uppercaseratio", DoubleType, false) :: /*2 */ StructField("C2lowercaseratio", DoubleType, false) :: /*3*/ StructField("C3alphanumericratio", DoubleType, false) ::
-          /*4*/ StructField("C4asciiratio", DoubleType, false) :: /*5*/ StructField("C5bracketratio", DoubleType, false) :: /*6*/ StructField("C6digitalratio", DoubleType, false) ::
-          /*7*/ StructField("C7latinratio", DoubleType, false) :: /*8*/ StructField("C8whitespaceratio", DoubleType, false) :: /* 9*/ StructField("C9puncratio", DoubleType, false) ::
-          /*10*/ StructField("C10longcharacterseq", DoubleType, false) :: /*11*/ StructField("C11arabicratio", DoubleType, false) :: /*12*/ StructField("C12bengaliratio", DoubleType, false) ::
-          /*13 */ StructField("C13brahmiratio", DoubleType, false) :: /*14*/ StructField("C14cyrilinratio", DoubleType, false) :: /*15*/ StructField("C15hanratio", DoubleType, false) ::
-          /*16*/ StructField("c16malysiaratio", DoubleType, false) :: /*17*/ StructField("C17tamiratio", DoubleType, false) :: /*18*/ StructField("C18telugratio", DoubleType, false) ::
-          /*19 */ StructField("C19symbolratio", DoubleType, false) :: /*20 */ StructField("C20alpharatio", DoubleType, false) :: /*21*/ StructField("C21visibleratio", DoubleType, false) ::
-          /*22*/ StructField("C22printableratio", DoubleType, false) :: /*23*/ StructField("C23blankratio", DoubleType, false) :: /*24 */ StructField("C24controlratio", DoubleType, false) ::
-          /* 25 */ StructField("C25hexaratio", DoubleType, false) ::
-
-          //word Features:
-          /*26*/ StructField("W1languagewordratio", DoubleType, false) :: /*27 Boolean */ StructField("W2Iscontainlanguageword", DoubleType, false) :: /*28*/ StructField("W3lowercaseratio", DoubleType, false) ::
-          /*29 Integer */ StructField("W4longestword", IntegerType, false) :: /*30 Boolean */ StructField("W5IscontainURL", DoubleType, false) :: /*31*/ StructField("W6badwordratio", DoubleType, false) ::
-          /*32*/ StructField("W7uppercaseratio", DoubleType, false) :: /*33*/ StructField("W8banwordratio", DoubleType, false) :: /*34 Boolean */ StructField("W9FemalFirstName", DoubleType, false) ::
-          /*35 Boolean */ StructField("W10MaleFirstName", DoubleType, false) :: /*36 Boolean */ StructField("W11IscontainBadword", DoubleType, false) :: /*37 Boolean*/ StructField("W12IsContainBanword", DoubleType, false) ::
-          /*38 integer */ StructField("W13NumberSharewords", DoubleType, false) :: /*39 Integer */ StructField("W14NumberSharewordswithoutStopwords", DoubleType, false) ::
-          /*40*/ StructField("W15PortionQid", DoubleType, false) :: /*41*/ StructField("W16PortionLnags", DoubleType, false) :: /*42*/ StructField("W17PortionLinks", DoubleType, false) ::
-
-          //
-          //          // Sentences Features:
-          /*43*/ StructField("S1CommentTailLength", DoubleType, false) :: /*44*/ StructField("S2SimikaritySitelinkandLabel", DoubleType, false) :: /*45*/ StructField("S3SimilarityLabelandSitelink", DoubleType, false) :: /*46*/ StructField("S4SimilarityCommentComment", DoubleType, false) ::
-          //
-          //          // Statements Features :
-          /*47*/ StructField("SS1Property", StringType, false) :: /*48*/ StructField("SS2DataValue", StringType, false) :: /*49*/ StructField("SS3ItemValue", StringType, false) ::
-          //
-          //
-          //        //User Features :
-          /*50 Boolean*/ StructField("U1IsPrivileged", DoubleType, false) :: /*51 Boolean*/ StructField("U2IsBotUser", DoubleType, false) :: /*52 Boolean*/ StructField("U3IsBotuserWithFlaguser", DoubleType, false) ::
-          /*53 Boolean*/ StructField("U4IsProperty", DoubleType, false) :: /*54 Boolean*/ StructField("U5IsTranslator", DoubleType, false) :: /*55 Boolean*/ StructField("U6IsRegister", DoubleType, false) ::
-          /*56*/ StructField("U7IPValue", DoubleType, false) :: /*57*/ StructField("U8UserID", IntegerType, false) :: /*58*/ StructField("U9HasBirthDate", DoubleType, false) :: /*59*/ StructField("U10HasDeathDate", DoubleType, false) ::
-
-          //Items Features :
-
-          /*60*/ StructField("I1NumberLabels", DoubleType, false) :: /*61*/ StructField("I2NumberDescription", DoubleType, false) :: /*62*/ StructField("I3NumberAliases", DoubleType, false) :: /*63*/ StructField("I4NumberClaims", DoubleType, false) ::
-          /*64*/ StructField("I5NumberSitelinks", DoubleType, false) :: /*65*/ StructField("I6NumberStatement", DoubleType, false) :: /*66*/ StructField("I7NumberReferences", DoubleType, false) :: /*67*/ StructField("I8NumberQualifier", DoubleType, false) ::
-          /*68*/ StructField("I9NumberQualifierOrder", DoubleType, false) :: /*69*/ StructField("I10NumberBadges", DoubleType, false) :: /*70*/ StructField("I11ItemTitle", StringType, false) ::
-
-          // Revision Features:
-          /*71*/ StructField("R1languageRevision", StringType, false) :: /*72*/ StructField("R2RevisionLanguageLocal", StringType, false) :: /*73*/ StructField("R3IslatainLanguage", DoubleType, false) ::
-          /*74*/ StructField("R4JsonLength", DoubleType, false) :: /*75*/ StructField("R5RevisionAction", StringType, false) :: /*76*/ StructField("R6PrevReviAction", StringType, false) ::
-          /*77*/ StructField("R7RevisionAccountChange", DoubleType, false) :: /*78*/ StructField("R8ParRevision", StringType, false) :: /*79*/ StructField("R9RevisionTime", StringType, false) ::
-          /*80*/ StructField("R10RevisionSize", DoubleType, false) :: /*81*/ StructField("R11ContentType", StringType, false) :: /*82*/ StructField("R12BytesIncrease", DoubleType, false) ::
-          /*83*/ StructField("R13TimeSinceLastRevi", DoubleType, false) :: /*84*/ StructField("R14CommentLength", DoubleType, false) :: /*85*/ StructField("R15RevisionSubaction", StringType, false) ::
-          /*86*/ StructField("R16PrevReviSubaction", StringType, false) ::
-
-          Nil)
-
-      val rowRDD = Result_all_Features.map(line => line.split(",")).map(e ⇒ Row(e(0).toInt // character feature column
-      , e(1).toDouble, e(2).toDouble, e(3).toDouble, e(4).toDouble, e(5).toDouble, e(6).toDouble, e(7).toDouble, e(8).toDouble, e(9).toDouble, RoundDouble(e(10).toDouble),
-        e(11).toDouble, e(12).toDouble, e(13).toDouble, e(14).toDouble, e(15).toDouble, e(16).toDouble, e(17).toDouble, e(18).toDouble, e(19).toDouble, e(20).toDouble, e(21).toDouble, e(22).toDouble, e(23).toDouble, e(24).toDouble, e(25).toDouble //Word Feature column
-        , e(26).toDouble, e(27).toDouble, e(28).toDouble, e(29).toDouble.toInt, e(30).toDouble, e(31).toDouble, e(32).toDouble, e(33).toDouble, e(34).toDouble, e(35).toDouble, e(36).toDouble, e(37).toDouble, RoundDouble(e(38).toDouble), RoundDouble(e(39).toDouble), e(40).toDouble, e(41).toDouble, e(42).toDouble // Sentences Features column:
-        , RoundDouble(e(43).toDouble), e(44).toDouble, e(45).toDouble, e(46).toDouble //Statement Features Column: 
-        , e(47), e(48), e(49) // User Features Column: 
-        , e(50).toDouble, e(51).toDouble, e(52).toDouble, e(53).toDouble, e(54).toDouble, e(55).toDouble, e(56).toDouble, e(57).toDouble.toInt, e(58).toDouble, e(59).toDouble //Item Features column:
-        , e(60).toDouble, e(61).toDouble, e(62).toDouble, e(63).toDouble, e(64).toDouble, e(65).toDouble, e(66).toDouble, e(67).toDouble, e(68).toDouble, e(69).toDouble, "Q" + e(70).toDouble.toInt.toString() //Revision Features Column: 
-        , e(71), e(72), e(73).toDouble, e(74).toDouble, e(75), e(76), e(77).toDouble, e(78), e(79), e(80).toDouble, e(81), e(82).toDouble, e(83).toDouble, e(84).toDouble, e(85), e(86)))
-
-      //a.User Frequency:
-      //number of revisions a user has contributed
-      //val resu= DF_Tags.groupBy("contributorID").agg(count("Rid"))
-      DF_Tags.registerTempTable("TagesTable")
-      val ContributorFreq_for_Each_Revision_DF = sqlContext.sql("select contributorID as CIDUSER1, count(Rid) as NumberofRevisionsUserContributed from TagesTable where contributorID !='0' group by contributorID ") //.drop("CIDUSER1")
-      //ContributorFreq_for_Each_Revision_DF.show()
-
-      //b.Cumulated : Number of a unique Item a user has contributed.
-      val CumulatedNumberof_uniqueItemsForUser_DF = sqlContext.sql("select contributorID as CIDUSER2,  COUNT(DISTINCT itemid) as NumberofUniqueItemsUseredit from TagesTable where contributorID !='0' group by contributorID") //.drop("CIDUSER2")
-      //CumulatedNumberof_uniqueItemsForUser_DF.show()
-
-      //1.Item Frequency:
-      // number of revisions an Item has
-      val ItemFrequ_DF = sqlContext.sql("select itemid, count(Rid) as NumberRevisionItemHas from TagesTable  group by itemid")
-      // ItemFrequ_DF.show()
-
-      //2. Cumulate number of unique users have edited the Item : Did not consider the users IP. Contributor is an IP or Name. we consider name
-      val CumulatedNumberof_UniqueUserForItem_DF = sqlContext.sql("select itemid,  COUNT(DISTINCT contributorID) as NumberUniqUserEditItem from TagesTable where contributorID !='0' group by itemid")
-      //CumulatedNumberof_UniqueUserForItem_DF.show()
-
-      //3. freq each Item :
-      val Fre_Item_DF = sqlContext.sql("select itemid,  COUNT(itemid) as FreqItem from TagesTable  group by itemid")
-      // Fre_Item_DF.show()
-
-      //*****************************************************************************************************************************************
-      // This is Main DataFrame:
-      val BeforeJoin_All_Features = sqlContext.createDataFrame(rowRDD, schema)
-      //BeforeJoin_All_Features.show()
-
-      //********************************** User feature Join
-
-      // Join1 for add The first User Feature : number of revisions a user has contributed
-      val AfterJoinUser1_All_Features = BeforeJoin_All_Features.as("T1").join(ContributorFreq_for_Each_Revision_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER1", "leftouter").drop("CIDUSER1")
-      //AfterJoinUser1_All_Features.show()
-
-      // Join2 for add The second  User Feature
-      val AfterJoinUser2_All_Features = AfterJoinUser1_All_Features.as("T1").join(CumulatedNumberof_uniqueItemsForUser_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER2", "leftouter").drop("CIDUSER2")
-      //AfterJoinUser2_All_Features.show()
-
-      //********************************** Item Feature Join
-      // Join3 for add The First  Item Feature :number of revisions an Item has
-      val AfterJoinItem3_All_Features = AfterJoinUser2_All_Features.as("T1").join(ItemFrequ_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      // AfterJoinItem3_All_Features.show()
-
-      // Join4 for add The Second  Item Feature
-      val AfterJoinItem4_All_Features = AfterJoinItem3_All_Features.as("T1").join(CumulatedNumberof_UniqueUserForItem_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      // AfterJoinItem4_All_Features.show()
-
-      // Join5 for add The Third  Item Feature
-      val AfterJoinItem5_All_Features = AfterJoinItem4_All_Features.as("T1").join(Fre_Item_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      //2 AfterJoinItem5_All_Features.show()
-
-      //********************************
-
-      //*Geografical information Feature from Meta File
-      //REVISION_ID|REVISION_SESSION_ID|USER_COUNTRY_CODE|USER_CONTINENT_CODE|USER_TIME_ZONE|USER_REGION_CODE|USER_CITY_NAME|USER_COUNTY_NAME|REVISION_TAGS
-      val df_GeoInf = sqlContext.read
-        .format("com.databricks.spark.csv")
-        .option("header", "true") // Use first line of all files as header
-        .option("inferSchema", "true") // Automatically infer data types
-        .load("hdfs://localhost:9000/mydata/Meta.csv").select("REVISION_ID", "REVISION_SESSION_ID", "USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")
-      // df_GeoInf.show()
-
-      val df_Truth = sqlContext.read
-        .format("com.databricks.spark.csv")
-        .option("header", "true") // Use first line of all files as header
-        .option("inferSchema", "true") // Automatically infer data types
-        .load("hdfs://localhost:9000/mydata/truth.csv").select("REVISION_ID", "ROLLBACK_REVERTED", "UNDO_RESTORE_REVERTED")
-      // df_GeoInf.show()
-
-      val AfterJoinGeoInfo_All_Features = AfterJoinItem5_All_Features.as("T1").join(df_GeoInf.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-      // AfterJoinGeoInfo_All_Features.show()
-
-      val Final_All_Features = AfterJoinGeoInfo_All_Features.as("T1").join(df_Truth.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-      //Final_All_Features.show()
-
-      // Pre- process Data ============================================================================================================================================================
-
-      // For String Column, We fill the Null values by "NA":
-
-      var Fill_Missing_Final_All_Features = Final_All_Features.na.fill("NA", Seq("USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")).cache()
-
-      // For Integer Frequency  Column, We fill the Null values by 0:
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.na.fill(0, Seq("FreqItem", "NumberUniqUserEditItem", "NumberRevisionItemHas", "NumberofUniqueItemsUseredit", "NumberofRevisionsUserContributed", "REVISION_SESSION_ID")).cache()
-      //Fill_Missing_Final_All_Features.show()
-
-      val BoolToDoubleUDF = udf { (BoolAsString: String) => if (BoolAsString == "T") 1.0 else 0.0 }
-      val IntegerToDouble = udf { (IntegerRevisionSessionID: Integer) => IntegerRevisionSessionID.toDouble }
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalROLLBACK_REVERTED", BoolToDoubleUDF(col("ROLLBACK_REVERTED")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalUNDO_RESTORE_REVERTED", BoolToDoubleUDF(col("UNDO_RESTORE_REVERTED")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalREVISION_SESSION_ID", IntegerToDouble(col("REVISION_SESSION_ID")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofRevisionsUserContributed", IntegerToDouble(col("NumberofRevisionsUserContributed")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofUniqueItemsUseredit", IntegerToDouble(col("NumberofUniqueItemsUseredit")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberRevisionItemHas", IntegerToDouble(col("NumberRevisionItemHas")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberUniqUserEditItem", IntegerToDouble(col("NumberUniqUserEditItem")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalFreqItem", IntegerToDouble(col("FreqItem")))
-
-      //===========================================================================Caharacter Features : Double , Integer Features ====================================================================================
-      //Double Ratio:  For Ratio Double column, Fill -1 value by Median:Character Features + Ratio of Word Features :
-      var Samples = Fill_Missing_Final_All_Features.sample(false, 0.001).cache() //.where($"S2SimikaritySitelinkandLabel">0.0 || $"S3SimilarityLabelandSitelink">0.0 || $"S4SimilarityCommentComment">0.0)
-      Samples.registerTempTable("df")
-
-      val Query = "select " +
-        "percentile_approx(C1uppercaseratio, 0.5) as meadian1" + "," + "percentile_approx(C2lowercaseratio, 0.5) as median2" + " ," +
-        "percentile_approx(C3alphanumericratio, 0.5) as median3" + "," + "percentile_approx(C4asciiratio, 0.5) as median4" + "," +
-        "percentile_approx(C5bracketratio, 0.5) as median5" + "," + "percentile_approx(C6digitalratio, 0.5) as median6" + "," +
-        "percentile_approx(C7latinratio, 0.5) as median7" + "," + "percentile_approx(C8whitespaceratio, 0.5) as median8" + "," +
-        "percentile_approx(C9puncratio, 0.5) as median9" + "," + "percentile_approx(C11arabicratio, 0.5) as median11" + "," +
-        "percentile_approx(C12bengaliratio, 0.5) as median12" + "," + "percentile_approx(C13brahmiratio, 0.5) as median13" + "," +
-        "percentile_approx(C14cyrilinratio, 0.5) as median14" + "," + "percentile_approx(C15hanratio, 0.5) as median15" + "," +
-        "percentile_approx(c16malysiaratio, 0.5) as median16" + "," +
-        "percentile_approx(C17tamiratio, 0.5) as median17" + "," + "percentile_approx(C18telugratio, 0.5) as median18" + "," +
-        "percentile_approx(C19symbolratio, 0.5) as median19" + "," + "percentile_approx(C20alpharatio, 0.5) as median20" + "," +
-        "percentile_approx(C21visibleratio, 0.5) as median21" + "," + "percentile_approx(C22printableratio, 0.5) as median22" + "," +
-        "percentile_approx(C23blankratio, 0.5) as median23" + "," + "percentile_approx(C24controlratio, 0.5) as median24" + "," +
-        "percentile_approx(C25hexaratio, 0.5) as median25" ++ "," + "percentile_approx(W1languagewordratio, 0.5) as median26" + "," +
-        "percentile_approx(W3lowercaseratio, 0.5) as median27" + "," + "percentile_approx(W6badwordratio, 0.5) as median28" + "," +
-        "percentile_approx(W7uppercaseratio, 0.5) as median27" + "," + "percentile_approx(W8banwordratio, 0.5) as median27" + " from df"
-
-      val medianValues = sqlContext.sql(Query).rdd
-      val Median = medianValues.first()
-
-      // Median :
-      // Character Ratio Features: UDF
-      val lkpUDF1 = udf { (i: Double) => if (i == 0) Median(0).toString().toDouble else i }
-      val lkpUDF2 = udf { (i: Double) => if (i == 0) Median(1).toString().toDouble else i }
-      val lkpUDF3 = udf { (i: Double) => if (i == 0) Median(2).toString().toDouble else i }
-      val lkpUDF4 = udf { (i: Double) => if (i == 0) Median(3).toString().toDouble else i }
-      val lkpUDF5 = udf { (i: Double) => if (i == 0) Median(4).toString().toDouble else i }
-      val lkpUDF6 = udf { (i: Double) => if (i == 0) Median(5).toString().toDouble else i }
-      val lkpUDF7 = udf { (i: Double) => if (i == 0) Median(6).toString().toDouble else i }
-      val lkpUDF8 = udf { (i: Double) => if (i == 0) Median(7).toString().toDouble else i }
-      val lkpUDF9 = udf { (i: Double) => if (i == 0) Median(8).toString().toDouble else i }
-
-      val lkpUDF11 = udf { (i: Double) => if (i == 0) Median(9).toString().toDouble else i }
-      val lkpUDF12 = udf { (i: Double) => if (i == 0) Median(10).toString().toDouble else i }
-      val lkpUDF13 = udf { (i: Double) => if (i == 0) Median(11).toString().toDouble else i }
-      val lkpUDF14 = udf { (i: Double) => if (i == 0) Median(12).toString().toDouble else i }
-      val lkpUDF15 = udf { (i: Double) => if (i == 0) Median(13).toString().toDouble else i }
-      val lkpUDF16 = udf { (i: Double) => if (i == 0) Median(14).toString().toDouble else i }
-      val lkpUDF17 = udf { (i: Double) => if (i == 0) Median(15).toString().toDouble else i }
-      val lkpUDF18 = udf { (i: Double) => if (i == 0) Median(16).toString().toDouble else i }
-      val lkpUDF19 = udf { (i: Double) => if (i == 0) Median(17).toString().toDouble else i }
-      val lkpUDF20 = udf { (i: Double) => if (i == 0) Median(18).toString().toDouble else i }
-      val lkpUDF21 = udf { (i: Double) => if (i == 0) Median(19).toString().toDouble else i }
-      val lkpUDF22 = udf { (i: Double) => if (i == 0) Median(20).toString().toDouble else i }
-      val lkpUDF23 = udf { (i: Double) => if (i == 0) Median(21).toString().toDouble else i }
-      val lkpUDF24 = udf { (i: Double) => if (i == 0) Median(22).toString().toDouble else i }
-      val lkpUDF25 = udf { (i: Double) => if (i == 0) Median(23).toString().toDouble else i }
-
-      val df1 = Fill_Missing_Final_All_Features.withColumn("FinalC1uppercaseratio", lkpUDF1(col("C1uppercaseratio"))) //.drop("C1uppercaseratio").cache()
-      val df2 = df1.withColumn("FinalC2lowercaseratio", lkpUDF2(col("C2lowercaseratio"))) //.drop("C2lowercaseratio").cache()
-      //df1.unpersist()
-      val df3 = df2.withColumn("FinalC3alphanumericratio", lkpUDF3(col("C3alphanumericratio"))) //.drop("C3alphanumericratio").cache()
-      //df2.unpersist()
-      val df4 = df3.withColumn("FinalC4asciiratio", lkpUDF4(col("C4asciiratio"))) //.drop("C4asciiratio").cache()
-      //df3.unpersist()
-      val df5 = df4.withColumn("FinalC5bracketratio", lkpUDF5(col("C5bracketratio"))) //.drop("C5bracketratio").cache()
-      //df4.unpersist()
-      val df6 = df5.withColumn("FinalC6digitalratio", lkpUDF6(col("C6digitalratio"))) //.drop("C6digitalratio").cache()
-      //df5.unpersist()
-      val df7 = df6.withColumn("FinalC7latinratio", lkpUDF7(col("C7latinratio"))) //.drop("C7latinratio").cache()
-      //df6.unpersist()
-      val df8 = df7.withColumn("FinalC8whitespaceratio", lkpUDF8(col("C8whitespaceratio"))) //.drop("C8whitespaceratio").cache()
-      //df7.unpersist()
-      val df9 = df8.withColumn("FinalC9puncratio", lkpUDF9(col("C9puncratio"))) //.drop("C9puncratio").cache()
-
-      // Mean :
-      // character integer values :
-      val Mean_C10longcharacterseq = Samples.agg(mean("C10longcharacterseq")).head()
-      val C10_Mean = Mean_C10longcharacterseq.getDouble(0)
-      val lkpUDFC10 = udf { (i: Double) => if (i == 0) C10_Mean else i }
-      val df10 = df9.withColumn("FinalC10longcharacterseq", lkpUDFC10(col("C10longcharacterseq")))
-
-      //Median
-      val df11 = df10.withColumn("FinalC11arabicratio", lkpUDF11(col("C11arabicratio"))) //.drop("C11arabicratio").cache()
-      // df9.unpersist()
-      val df12 = df11.withColumn("FinalC12bengaliratio", lkpUDF12(col("C12bengaliratio"))) //.drop("C12bengaliratio").cache()
-      //df11.unpersist()
-      val df13 = df12.withColumn("FinalC13brahmiratio", lkpUDF13(col("C13brahmiratio"))) //.drop("C13brahmiratio").cache()
-      // df12.unpersist()
-      val df14 = df13.withColumn("FinalC14cyrilinratio", lkpUDF14(col("C14cyrilinratio"))) //.drop("C14cyrilinratio").cache()
-      // df13.unpersist()
-      val df15 = df14.withColumn("FinalC15hanratio", lkpUDF15(col("C15hanratio"))) //.drop("C15hanratio").cache()
-      // df14.unpersist()
-      val df16 = df15.withColumn("Finalc16malysiaratio", lkpUDF16(col("c16malysiaratio"))) //.drop("c16malysiaratio").cache()
-      //df15.unpersist()
-      val df17 = df16.withColumn("FinalC17tamiratio", lkpUDF17(col("C17tamiratio"))) //.drop("C17tamiratio").cache()
-      //df16.unpersist()
-      val df18 = df17.withColumn("FinalC18telugratio", lkpUDF18(col("C18telugratio"))) //.drop("C18telugratio").cache()
-      //df17.unpersist()
-      val df19 = df18.withColumn("FinalC19symbolratio", lkpUDF19(col("C19symbolratio"))) //.drop("C19symbolratio").cache()
-      //df18.unpersist()
-      val df20 = df19.withColumn("FinalC20alpharatio", lkpUDF20(col("C20alpharatio"))) //.drop("C20alpharatio").cache()
-      // df19.unpersist()
-      val df21 = df20.withColumn("FinalC21visibleratio", lkpUDF21(col("C21visibleratio"))) //.drop("C21visibleratio").cache()
-      // df20.unpersist()
-      val df22 = df21.withColumn("FinalC22printableratio", lkpUDF22(col("C22printableratio"))) //.drop("C22printableratio").cache()
-      //df21.unpersist()
-      val df23 = df22.withColumn("FinalC23blankratio", lkpUDF23(col("C23blankratio"))) //.drop("C23blankratio").cache()
-      // df22.unpersist()
-      val df24 = df23.withColumn("FinalC24controlratio", lkpUDF24(col("C24controlratio"))) //.drop("C24controlratio").cache()
-      //df23.unpersist()
-      val df25 = df24.withColumn("FinalC25hexaratio", lkpUDF25(col("C25hexaratio"))) //.drop("C25hexaratio").cache()
-
-      //************************************************End Character Features ****************************************************************************************
-
-      //************************************************Start Word  Features ****************************************************************************************
-
-      // Word Ratio Features : UDF
-      val lkpUDFW1 = udf { (i: Double) => if (i == 0) Median(24).toString().toDouble else i }
-      val lkpUDFW3 = udf { (i: Double) => if (i == 0) Median(25).toString().toDouble else i }
-      val lkpUDFW6 = udf { (i: Double) => if (i == 0) Median(26).toString().toDouble else i }
-      val lkpUDFW7 = udf { (i: Double) => if (i == 0) Median(27).toString().toDouble else i }
-      val lkpUDFW8 = udf { (i: Double) => if (i == 0) Median(28).toString().toDouble else i }
-
-      //1.
-      val df26 = df25.withColumn("FinalW1languagewordratio", lkpUDFW1(col("W1languagewordratio"))) //.drop("W1languagewordratio").cache()
-
-      //2.Boolean(Double) IsContainLanguageWord
-
-      //3.
-      val df27 = df26.withColumn("FinalW3lowercaseratio", lkpUDFW3(col("W3lowercaseratio"))) //.drop("W3lowercaseratio").cache()
-      // df26.unpersist()
-
-      //4. Integer " Mean:
-      val Mean_W4longestword = Samples.agg(mean("W4longestword")).head()
-      val W4_Mean = Mean_W4longestword.getDouble(0)
-      val lkpUDFW4 = udf { (i: Double) => if (i == 0) W4_Mean else i }
-      val df28 = df27.withColumn("FinalW4longestword", lkpUDFW4(col("W4longestword")))
-
-      //5. Boolean (Double ) W5IscontainURL
-      //6.
-      val df29 = df28.withColumn("FinalW6badwordratio", lkpUDFW6(col("W6badwordratio"))) //.drop("W6badwordratio").cache()
-
-      //7.
-      val df30 = df29.withColumn("FinalW7uppercaseratio", lkpUDFW7(col("W7uppercaseratio"))) //.drop("W7uppercaseratio").cache()
-
-      //8.
-      val df31 = df30.withColumn("FinalW8banwordratio", lkpUDFW8(col("W8banwordratio"))) //.drop("W8banwordratio").cache()
-
-      //9.FemalFirst       Boolean(Double)
-      //10.Male First      Boolean(Double)
-      //11.ContainBadWord  Boolean(Double)
-      //12ContainBanWord   Boolean(Double)
-
-      //13. Integer(Double):
-      val Mean_W13W13NumberSharewords = Samples.agg(mean("W13NumberSharewords")).head()
-      val W13_Mean = Mean_W13W13NumberSharewords.getDouble(0)
-      val lkpUDFW13 = udf { (i: Double) => if (i == 0) W13_Mean else i }
-      val df32 = df31.withColumn("FinalW13NumberSharewords", lkpUDFW13(col("W13NumberSharewords")))
-
-      //14. Integer (Double):
-      val Mean_W14NumberSharewordswithoutStopwords = Samples.agg(mean("W14NumberSharewordswithoutStopwords")).head()
-      val W14_Mean = Mean_W14NumberSharewordswithoutStopwords.getDouble(0)
-      val lkpUDFW14 = udf { (i: Double) => if (i == 0) W14_Mean else i }
-      val df33 = df32.withColumn("FinalW14NumberSharewordswithoutStopwords", lkpUDFW14(col("W14NumberSharewordswithoutStopwords")))
-
-      // 15. Double (Not ratio):
-      val Mean_W15PortionQid = Samples.agg(mean("W15PortionQid")).head()
-      val W15_Mean = Mean_W15PortionQid.getDouble(0)
-      val lkpUDFW15 = udf { (i: Double) => if (i == 0) W15_Mean else i }
-      val df34 = df33.withColumn("FinalW15PortionQid", lkpUDFW15(col("W15PortionQid")))
-
-      //16. Double(Not Ratio):
-      val Mean_W16PortionLnags = Samples.agg(mean("W16PortionLnags")).head()
-      val W16_Mean = Mean_W16PortionLnags.getDouble(0)
-      val lkpUDFW16 = udf { (i: Double) => if (i == 0) W16_Mean else i }
-      val df35 = df34.withColumn("FinalW16PortionLnags", lkpUDFW16(col("W16PortionLnags")))
-
-      //17.Double(Not ratio):
-      val Mean_W17PortionLinks = Samples.agg(mean("W17PortionLinks")).head()
-      val W17_Mean = Mean_W17PortionLinks.getDouble(0)
-      val lkpUDFW17 = udf { (i: Double) => if (i == 0) W17_Mean else i }
-      val df36 = df35.withColumn("FinalW17PortionLinks", lkpUDFW17(col("W17PortionLinks")))
-
-      //************************************************End Word  Features ****************************************************************************************
-
-      //************************************************Start Sentences  Features ****************************************************************************************
-      // 1. Integer(Double)
-      val Mean_S1CommentTailLength = Samples.agg(mean("S1CommentTailLength")).head()
-      val S1_Mean = RoundDouble(Mean_S1CommentTailLength.getDouble(0))
-      val lkpUDFS1 = udf { (i: Double) => if (i == 0) S1_Mean else i }
-      val df37 = df36.withColumn("FinalS1CommentTailLength", lkpUDFS1(col("S1CommentTailLength")))
-
-      //2. Double  but Not ratio values :
-      val Mean_S2SimikaritySitelinkandLabel = Samples.agg(mean("S2SimikaritySitelinkandLabel")).head()
-      val S2_Mean = RoundDouble(Mean_S2SimikaritySitelinkandLabel.getDouble(0))
-      val lkpUDFS2 = udf { (i: Double) => if (i == 0) S2_Mean else i }
-      val df39 = df37.withColumn("FinalS2SimikaritySitelinkandLabel", lkpUDFS2(col("S2SimikaritySitelinkandLabel")))
-
-      //3. Double  but Not ratio values :
-      val Mean_S3SimilarityLabelandSitelink = Samples.agg(mean("S3SimilarityLabelandSitelink")).head()
-      val S3_Mean = RoundDouble(Mean_S3SimilarityLabelandSitelink.getDouble(0))
-      val lkpUDFS3 = udf { (i: Double) => if (i == 0.0) S3_Mean else i }
-      val df40 = df39.withColumn("FinalS3SimilarityLabelandSitelink", lkpUDFS3(col("S3SimilarityLabelandSitelink")))
-
-      //4.  Double  but Not ratio values :
-      val Mean_S4SimilarityCommentComment = Samples.agg(mean("S4SimilarityCommentComment")).head()
-      val S4_Mean = RoundDouble(Mean_S4SimilarityCommentComment.getDouble(0))
-      val lkpUDFS4 = udf { (i: Double) => if (i == 0.0) S4_Mean else i }
-      val df41 = df40.withColumn("FinalS4SimilarityCommentComment", lkpUDFS4(col("S4SimilarityCommentComment")))
-
-      //df41.show()
-      //************************************************End Sentences  Features ****************************************************************************************
-      //*********************************************** Start Statement  Features ****************************************************************************************
-      //1. String
-      //2. String
-      //3. String
-      //************************************************End Statement  Features ****************************************************************************************
-      //*********************************************** Start User Features ****************************************************************************************
-
-      //1.Boolean(Double)
-      //2.Boolean(Double)
-      //3.Boolean(Double)
-      //4.Boolean(Double)
-      //5.Boolean(Double)
-      //6.Boolean(Double)
-      //7. (Double) IP No need to fill Missing Data
-      //8. (Double) ID No need to fill Missing Data
-      //9.Boolean(Double)
-      //10.Boolean(Double)
-
-      //*********************************************** End User Features ****************************************************************************************
-      //*********************************************** Start Item Features ****************************************************************************************
-      //1. Integer (Double) No need to fill missing values
-      //2. Integer (Double) No need to fill missing values
-      //3. Integer (Double) No need to fill missing values
-      //4. Integer (Double) No need to fill missing values
-      //5. Integer (Double) No need to fill missing values
-      //6. Integer (Double) No need to fill missing values
-      //7. Integer (Double) No need to fill missing values
-      //8. Integer (Double) No need to fill missing values
-      //9. Integer (Double) No need to fill missing values
-      //10. Integer (Double) No need to fill missing values
-      //11. String
-      //*********************************************** End Item Features ****************************************************************************************
-      //*********************************************** Start Revision Features ****************************************************************************************
-      //1.String
-      //2.String
-      //3.Boolean (Double)
-      //4.Integer(Double)
-      //5.String
-      //6.String
-      //7. Boolean(Double)
-      //8. String
-      //9.String
-      //10. Integer (Double)
-      //11.String
-      //12. integer(Double)
-      //13. Long(Double)
-      //14. integer (Double)
-      //15.String
-      //16.String
-      //*********************************************** End Revision Features ****************************************************************************************
-      //*********************************************** Meta Data , Truth Data and Frequnces  ****************************************************************************************
-      //Meta
-      // 1.Revision Session :Integer (Converted to Double)
-      //2. User Country Code
-      //3.User Continent Code
-      //4.User Time Size
-      //5.User Region Code
-      //6.User-city Name
-      //7.User Country Name
-      //8.RevisionTags
-
+    // Streaming records:
+    val jobConf = new JobConf()
+    val NormalXML_Parser_OBJ = new ParseNormalXML()
+    val RDD_OBJ = new ParseNormalXML()
+
+    val Training_RDD_All_Record1 = RDD_OBJ.Training_DB_NormalXML_Parser_Input1(sc)
+    val Training_RDD_All_Record2 = RDD_OBJ.Training_DB_NormalXML_Parser_Input2(sc)
+    val Training_RDD_All_Record3 = RDD_OBJ.Training_DB_NormalXML_Parser_Input3(sc)
+    // RDD_All_Record1.foreach(println)
+    // RDD_All_Record2.foreach(println)
+    // RDD_All_Record3.foreach(println)
+
+    val Training_RDD_All_Record = Training_RDD_All_Record1.union(Training_RDD_All_Record2).union(Training_RDD_All_Record3).distinct().cache()
+
+    // println(RDD_All_Record.count())
+    println(Training_RDD_All_Record.count())
+
+    // ======= Json part :
+    // Json RDD : Each record has its Revision iD:
+    val JsonRDD = Training_RDD_All_Record.map(_.split("NNLL")).map(v => replacing_with_Quoto(v(0), v(8))).cache()
+    // JsonRDD.foreach(println)
+    // println(JsonRDD.count())
+
+    // Data set
+    val Ds_Json = sqlContext.jsonRDD(JsonRDD).select("key", "id", "labels", "descriptions", "aliases", "claims", "sitelinks").cache()
+    // Ds_Json.show()
+    // println(Ds_Json.count())
+
+    // ======= Tags part : // Contributor IP here is in Decimal format not IP format and It is converted in ParseNormalXml stage
+    val TagsRDD = Training_RDD_All_Record.map(_.split("NNLL")).map(x => (x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11))).cache()
+    val DF_Tags = TagsRDD.toDF("Rid", "Itemid", "comment", "pid", "time", "contributorIP",
+      "contributorID", "contributorName", "JsonText", "model", "format", "sha").cache()
+    //    DF_Tags.show()
+    //    println(DF_Tags.count())
+
+    // ======== Join Json part with Tag Part:============================
+    // Joining to have full data
+    val DF_First_DF_Result_Join_Tags_and_Json = DF_Tags.as("T1").join(Ds_Json.as("T2"), $"T1.Rid" === $"T2.key", "leftouter")
+      .select("Rid", "itemid", "comment", "pid", "time", "contributorIP",
+        "contributorID", "contributorName", "JsonText", "labels", "descriptions",
+        "aliases", "claims", "sitelinks", "model", "format", "sha") // .orderBy("Rid", "Itemid")
+    DF_First_DF_Result_Join_Tags_and_Json.registerTempTable("Data1")
+    val dfr_DATA_JsonTages1 = sqlContext.sql("select * from Data1 order by itemid ,Rid ").cache()
+
+    val colNames = Seq("Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2",
+      "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2",
+      "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
+    val DF_Second = DF_First_DF_Result_Join_Tags_and_Json.toDF(colNames: _*) // .distinct()
+    DF_Second.registerTempTable("Data2")
+
+    // ===================================================================Parent // Previous Revision==============================================================================================================
+    // val DF_Joined = result1.as("df1").join(result2.as("df2"), col("itemid") === col("itemid2") && col("index1") === col("index2") + 1, "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha", "Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
+    // .select("itemid", "Rid","pid","time","itemid2","Rid2","pid2","time2")
+
+    // Joining based on Parent Id to get the previous cases: ParentID
+    val DF_Joined = DF_First_DF_Result_Join_Tags_and_Json.as("df1").join(DF_Second.as("df2"), $"df1.pid" === $"df2.Rid2", "leftouter").distinct()
+
+    val RDD_After_JoinDF = DF_Joined.rdd.distinct()
+    val x = RDD_After_JoinDF.map(row => (row(0).toString().toInt, row)).cache()
+    val part = new RangePartitioner(4, x)
+    val partitioned = x.partitionBy(part).persist() // persist is important for this case and obligatory.
+    // partitioned.foreach(println)
+    //
+    //      //=====================================================All Features Based on Categories of Features Data Type :==================================================================================
+    //
+    val Result_all_Features = partitioned.map { case (x, y) => (x.toString() + "," + All_Features(y).toString()) } // we convert the Pair RDD to String one LineRDD to be able to make DF based on ","
+    // Result_all_Features.foreach(println)
+    // println("nayef" + Result_all_Features.count())
+
+    // Conver the RDD of All Features to  DataFrame:
+
+    val schema = StructType(
+
+      // 0
+      StructField("Rid", IntegerType, false) ::
+
+        // Character Features :
+        /* 1 */ StructField("C1uppercaseratio", DoubleType, false) :: /* 2 */ StructField("C2lowercaseratio", DoubleType, false) :: /*3*/ StructField("C3alphanumericratio", DoubleType, false) ::
+        /* 4 */ StructField("C4asciiratio", DoubleType, false) :: /* 5 */ StructField("C5bracketratio", DoubleType, false) :: /*6*/ StructField("C6digitalratio", DoubleType, false) ::
+        /* 7 */ StructField("C7latinratio", DoubleType, false) :: /* 8 */ StructField("C8whitespaceratio", DoubleType, false) :: /* 9*/ StructField("C9puncratio", DoubleType, false) ::
+        /* 10 */ StructField("C10longcharacterseq", DoubleType, false) :: /* 11 */ StructField("C11arabicratio", DoubleType, false) :: /*12*/ StructField("C12bengaliratio", DoubleType, false) ::
+        /* 13 */ StructField("C13brahmiratio", DoubleType, false) :: /* 14 */ StructField("C14cyrilinratio", DoubleType, false) :: /*15*/ StructField("C15hanratio", DoubleType, false) ::
+        /* 16 */ StructField("c16malysiaratio", DoubleType, false) :: /* 17 */ StructField("C17tamiratio", DoubleType, false) :: /*18*/ StructField("C18telugratio", DoubleType, false) ::
+        /* 19 */ StructField("C19symbolratio", DoubleType, false) :: /* 20 */ StructField("C20alpharatio", DoubleType, false) :: /*21*/ StructField("C21visibleratio", DoubleType, false) ::
+        /* 22 */ StructField("C22printableratio", DoubleType, false) :: /* 23 */ StructField("C23blankratio", DoubleType, false) :: /*24 */ StructField("C24controlratio", DoubleType, false) ::
+        /* 25 */ StructField("C25hexaratio", DoubleType, false) ::
+
+        // word Features:
+        /* 26 */ StructField("W1languagewordratio", DoubleType, false) :: /* 27 Boolean */ StructField("W2Iscontainlanguageword", DoubleType, false) :: /*28*/ StructField("W3lowercaseratio", DoubleType, false) ::
+        /* 29 Integer */ StructField("W4longestword", IntegerType, false) :: /* 30 Boolean */ StructField("W5IscontainURL", DoubleType, false) :: /*31*/ StructField("W6badwordratio", DoubleType, false) ::
+        /* 32 */ StructField("W7uppercaseratio", DoubleType, false) :: /* 33 */ StructField("W8banwordratio", DoubleType, false) :: /*34 Boolean */ StructField("W9FemalFirstName", DoubleType, false) ::
+        /* 35 Boolean */ StructField("W10MaleFirstName", DoubleType, false) :: /* 36 Boolean */ StructField("W11IscontainBadword", DoubleType, false) :: /*37 Boolean*/ StructField("W12IsContainBanword", DoubleType, false) ::
+        /* 38 integer */ StructField("W13NumberSharewords", DoubleType, false) :: /* 39 Integer */ StructField("W14NumberSharewordswithoutStopwords", DoubleType, false) ::
+        /* 40 */ StructField("W15PortionQid", DoubleType, false) :: /* 41 */ StructField("W16PortionLnags", DoubleType, false) :: /*42*/ StructField("W17PortionLinks", DoubleType, false) ::
+
+        //
+        //          // Sentences Features:
+        /* 43 */ StructField("S1CommentTailLength", DoubleType, false) :: /* 44 */ StructField("S2SimikaritySitelinkandLabel", DoubleType, false) :: /*45*/ StructField("S3SimilarityLabelandSitelink", DoubleType, false) :: /*46*/ StructField("S4SimilarityCommentComment", DoubleType, false) ::
+        //
+        //          // Statements Features :
+        /* 47 */ StructField("SS1Property", StringType, false) :: /* 48 */ StructField("SS2DataValue", StringType, false) :: /*49*/ StructField("SS3ItemValue", StringType, false) ::
+        //
+        //
+        //        // User Features :
+        /* 50 Boolean */ StructField("U1IsPrivileged", DoubleType, false) :: /*51 Boolean*/ StructField("U2IsBotUser", DoubleType, false) :: /*52 Boolean*/ StructField("U3IsBotuserWithFlaguser", DoubleType, false) ::
+        /* 53 Boolean */ StructField("U4IsProperty", DoubleType, false) :: /*54 Boolean*/ StructField("U5IsTranslator", DoubleType, false) :: /*55 Boolean*/ StructField("U6IsRegister", DoubleType, false) ::
+        /* 56 */ StructField("U7IPValue", DoubleType, false) :: /* 57 */ StructField("U8UserID", IntegerType, false) :: /*58*/ StructField("U9HasBirthDate", DoubleType, false) :: /*59*/ StructField("U10HasDeathDate", DoubleType, false) ::
+
+        // Items Features :
+
+        /* 60 */ StructField("I1NumberLabels", DoubleType, false) :: /* 61 */ StructField("I2NumberDescription", DoubleType, false) :: /*62*/ StructField("I3NumberAliases", DoubleType, false) :: /*63*/ StructField("I4NumberClaims", DoubleType, false) ::
+        /* 64 */ StructField("I5NumberSitelinks", DoubleType, false) :: /* 65 */ StructField("I6NumberStatement", DoubleType, false) :: /*66*/ StructField("I7NumberReferences", DoubleType, false) :: /*67*/ StructField("I8NumberQualifier", DoubleType, false) ::
+        /* 68 */ StructField("I9NumberQualifierOrder", DoubleType, false) :: /* 69 */ StructField("I10NumberBadges", DoubleType, false) :: /*70*/ StructField("I11ItemTitle", StringType, false) ::
+
+        // Revision Features:
+        /* 71 */ StructField("R1languageRevision", StringType, false) :: /* 72 */ StructField("R2RevisionLanguageLocal", StringType, false) :: /*73*/ StructField("R3IslatainLanguage", DoubleType, false) ::
+        /* 74 */ StructField("R4JsonLength", DoubleType, false) :: /* 75 */ StructField("R5RevisionAction", StringType, false) :: /*76*/ StructField("R6PrevReviAction", StringType, false) ::
+        /* 77 */ StructField("R7RevisionAccountChange", DoubleType, false) :: /* 78 */ StructField("R8ParRevision", StringType, false) :: /*79*/ StructField("R9RevisionTime", StringType, false) ::
+        /* 80 */ StructField("R10RevisionSize", DoubleType, false) :: /* 81 */ StructField("R11ContentType", StringType, false) :: /*82*/ StructField("R12BytesIncrease", DoubleType, false) ::
+        /* 83 */ StructField("R13TimeSinceLastRevi", DoubleType, false) :: /* 84 */ StructField("R14CommentLength", DoubleType, false) :: /*85*/ StructField("R15RevisionSubaction", StringType, false) ::
+        /* 86 */ StructField("R16PrevReviSubaction", StringType, false) ::
+
+        Nil)
+
+    val rowRDD = Result_all_Features.map(line => line.split(",")).map(e ⇒ Row(e(0).toInt // character feature column
+    , e(1).toDouble, e(2).toDouble, e(3).toDouble, e(4).toDouble, e(5).toDouble, e(6).toDouble, e(7).toDouble, e(8).toDouble, e(9).toDouble, RoundDouble(e(10).toDouble),
+      e(11).toDouble, e(12).toDouble, e(13).toDouble, e(14).toDouble, e(15).toDouble, e(16).toDouble, e(17).toDouble, e(18).toDouble, e(19).toDouble, e(20).toDouble, e(21).toDouble, e(22).toDouble, e(23).toDouble, e(24).toDouble, e(25).toDouble // Word Feature column
+      , e(26).toDouble, e(27).toDouble, e(28).toDouble, e(29).toDouble.toInt, e(30).toDouble, e(31).toDouble, e(32).toDouble, e(33).toDouble, e(34).toDouble, e(35).toDouble, e(36).toDouble, e(37).toDouble, RoundDouble(e(38).toDouble), RoundDouble(e(39).toDouble), e(40).toDouble, e(41).toDouble, e(42).toDouble // Sentences Features column:
+      , RoundDouble(e(43).toDouble), e(44).toDouble, e(45).toDouble, e(46).toDouble // Statement Features Column:
+      , e(47), e(48), e(49) // User Features Column:
+      , e(50).toDouble, e(51).toDouble, e(52).toDouble, e(53).toDouble, e(54).toDouble, e(55).toDouble, e(56).toDouble, e(57).toDouble.toInt, e(58).toDouble, e(59).toDouble // Item Features column:
+      , e(60).toDouble, e(61).toDouble, e(62).toDouble, e(63).toDouble, e(64).toDouble, e(65).toDouble, e(66).toDouble, e(67).toDouble, e(68).toDouble, e(69).toDouble, "Q" + e(70).toDouble.toInt.toString() // Revision Features Column:
+      , e(71), e(72), e(73).toDouble, e(74).toDouble, e(75), e(76), e(77).toDouble, e(78), e(79), e(80).toDouble, e(81), e(82).toDouble, e(83).toDouble, e(84).toDouble, e(85), e(86)))
+
+    // a.User Frequency:
+    // number of revisions a user has contributed
+    // val resu= DF_Tags.groupBy("contributorID").agg(count("Rid"))
+    DF_Tags.registerTempTable("TagesTable")
+    val ContributorFreq_for_Each_Revision_DF = sqlContext.sql("select contributorID as CIDUSER1, count(Rid) as NumberofRevisionsUserContributed from TagesTable where contributorID !='0' group by contributorID ") //.drop("CIDUSER1")
+    // ContributorFreq_for_Each_Revision_DF.show()
+
+    // b.Cumulated : Number of a unique Item a user has contributed.
+    val CumulatedNumberof_uniqueItemsForUser_DF = sqlContext.sql("select contributorID as CIDUSER2,  COUNT(DISTINCT itemid) as NumberofUniqueItemsUseredit from TagesTable where contributorID !='0' group by contributorID") //.drop("CIDUSER2")
+    // CumulatedNumberof_uniqueItemsForUser_DF.show()
+
+    // 1.Item Frequency:
+    // number of revisions an Item has
+    val ItemFrequ_DF = sqlContext.sql("select itemid, count(Rid) as NumberRevisionItemHas from TagesTable  group by itemid")
+    // ItemFrequ_DF.show()
+
+    // 2. Cumulate number of unique users have edited the Item : Did not consider the users IP. Contributor is an IP or Name. we consider name
+    val CumulatedNumberof_UniqueUserForItem_DF = sqlContext.sql("select itemid,  COUNT(DISTINCT contributorID) as NumberUniqUserEditItem from TagesTable where contributorID !='0' group by itemid")
+    // CumulatedNumberof_UniqueUserForItem_DF.show()
+
+    // 3. freq each Item :
+    val Fre_Item_DF = sqlContext.sql("select itemid,  COUNT(itemid) as FreqItem from TagesTable  group by itemid")
+    // Fre_Item_DF.show()
+
+    // *****************************************************************************************************************************************
+    // This is Main DataFrame:
+    val BeforeJoin_All_Features = sqlContext.createDataFrame(rowRDD, schema)
+    // BeforeJoin_All_Features.show()
+
+    // ********************************** User feature Join
+
+    // Join1 for add The first User Feature : number of revisions a user has contributed
+    val AfterJoinUser1_All_Features = BeforeJoin_All_Features.as("T1").join(ContributorFreq_for_Each_Revision_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER1", "leftouter").drop("CIDUSER1")
+    // AfterJoinUser1_All_Features.show()
+
+    // Join2 for add The second  User Feature
+    val AfterJoinUser2_All_Features = AfterJoinUser1_All_Features.as("T1").join(CumulatedNumberof_uniqueItemsForUser_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER2", "leftouter").drop("CIDUSER2")
+    // AfterJoinUser2_All_Features.show()
+
+    // ********************************** Item Feature Join
+    // Join3 for add The First  Item Feature :number of revisions an Item has
+    val AfterJoinItem3_All_Features = AfterJoinUser2_All_Features.as("T1").join(ItemFrequ_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // AfterJoinItem3_All_Features.show()
+
+    // Join4 for add The Second  Item Feature
+    val AfterJoinItem4_All_Features = AfterJoinItem3_All_Features.as("T1").join(CumulatedNumberof_UniqueUserForItem_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // AfterJoinItem4_All_Features.show()
+
+    // Join5 for add The Third  Item Feature
+    val AfterJoinItem5_All_Features = AfterJoinItem4_All_Features.as("T1").join(Fre_Item_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // 2 AfterJoinItem5_All_Features.show()
+
+    // ********************************
+
+    // *Geografical information Feature from Meta File
+    // REVISION_ID|REVISION_SESSION_ID|USER_COUNTRY_CODE|USER_CONTINENT_CODE|USER_TIME_ZONE|USER_REGION_CODE|USER_CITY_NAME|USER_COUNTY_NAME|REVISION_TAGS
+    val df_GeoInf = sqlContext.read
+      .format("com.databricks.spark.csv")
+      .option("header", "true") // Use first line of all files as header
+      .option("inferSchema", "true") // Automatically infer data types
+      .load("hdfs://localhost:9000/mydata/Meta.csv").select("REVISION_ID", "REVISION_SESSION_ID", "USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")
+    // df_GeoInf.show()
+
+    val df_Truth = sqlContext.read
+      .format("com.databricks.spark.csv")
+      .option("header", "true") // Use first line of all files as header
+      .option("inferSchema", "true") // Automatically infer data types
+      .load("hdfs://localhost:9000/mydata/truth.csv").select("REVISION_ID", "ROLLBACK_REVERTED", "UNDO_RESTORE_REVERTED")
+    // df_GeoInf.show()
+
+    val AfterJoinGeoInfo_All_Features = AfterJoinItem5_All_Features.as("T1").join(df_GeoInf.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
+    // AfterJoinGeoInfo_All_Features.show()
+
+    val Final_All_Features = AfterJoinGeoInfo_All_Features.as("T1").join(df_Truth.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
+    // Final_All_Features.show()
+
+    // Pre- process Data ============================================================================================================================================================
+
+    // For String Column, We fill the Null values by "NA":
+
+    var Fill_Missing_Final_All_Features = Final_All_Features.na.fill("NA", Seq("USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")).cache()
+
+    // For Integer Frequency  Column, We fill the Null values by 0:
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.na.fill(0, Seq("FreqItem", "NumberUniqUserEditItem", "NumberRevisionItemHas", "NumberofUniqueItemsUseredit", "NumberofRevisionsUserContributed", "REVISION_SESSION_ID")).cache()
+    // Fill_Missing_Final_All_Features.show()
+
+    val BoolToDoubleUDF = udf { (BoolAsString: String) => if (BoolAsString == "T") 1.0 else 0.0 }
+    val IntegerToDouble = udf { (IntegerRevisionSessionID: Integer) => IntegerRevisionSessionID.toDouble }
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalROLLBACK_REVERTED", BoolToDoubleUDF(col("ROLLBACK_REVERTED")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalUNDO_RESTORE_REVERTED", BoolToDoubleUDF(col("UNDO_RESTORE_REVERTED")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalREVISION_SESSION_ID", IntegerToDouble(col("REVISION_SESSION_ID")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofRevisionsUserContributed", IntegerToDouble(col("NumberofRevisionsUserContributed")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofUniqueItemsUseredit", IntegerToDouble(col("NumberofUniqueItemsUseredit")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberRevisionItemHas", IntegerToDouble(col("NumberRevisionItemHas")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberUniqUserEditItem", IntegerToDouble(col("NumberUniqUserEditItem")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalFreqItem", IntegerToDouble(col("FreqItem")))
+
+    // ===========================================================================Caharacter Features : Double , Integer Features ====================================================================================
+    // Double Ratio:  For Ratio Double column, Fill -1 value by Median:Character Features + Ratio of Word Features :
+    var Samples = Fill_Missing_Final_All_Features.sample(false, 0.001).cache() //.where($"S2SimikaritySitelinkandLabel">0.0 || $"S3SimilarityLabelandSitelink">0.0 || $"S4SimilarityCommentComment">0.0)
+    Samples.registerTempTable("df")
+
+    val Query = "select " +
+      "percentile_approx(C1uppercaseratio, 0.5) as meadian1" + "," + "percentile_approx(C2lowercaseratio, 0.5) as median2" + " ," +
+      "percentile_approx(C3alphanumericratio, 0.5) as median3" + "," + "percentile_approx(C4asciiratio, 0.5) as median4" + "," +
+      "percentile_approx(C5bracketratio, 0.5) as median5" + "," + "percentile_approx(C6digitalratio, 0.5) as median6" + "," +
+      "percentile_approx(C7latinratio, 0.5) as median7" + "," + "percentile_approx(C8whitespaceratio, 0.5) as median8" + "," +
+      "percentile_approx(C9puncratio, 0.5) as median9" + "," + "percentile_approx(C11arabicratio, 0.5) as median11" + "," +
+      "percentile_approx(C12bengaliratio, 0.5) as median12" + "," + "percentile_approx(C13brahmiratio, 0.5) as median13" + "," +
+      "percentile_approx(C14cyrilinratio, 0.5) as median14" + "," + "percentile_approx(C15hanratio, 0.5) as median15" + "," +
+      "percentile_approx(c16malysiaratio, 0.5) as median16" + "," +
+      "percentile_approx(C17tamiratio, 0.5) as median17" + "," + "percentile_approx(C18telugratio, 0.5) as median18" + "," +
+      "percentile_approx(C19symbolratio, 0.5) as median19" + "," + "percentile_approx(C20alpharatio, 0.5) as median20" + "," +
+      "percentile_approx(C21visibleratio, 0.5) as median21" + "," + "percentile_approx(C22printableratio, 0.5) as median22" + "," +
+      "percentile_approx(C23blankratio, 0.5) as median23" + "," + "percentile_approx(C24controlratio, 0.5) as median24" + "," +
+      "percentile_approx(C25hexaratio, 0.5) as median25" ++ "," + "percentile_approx(W1languagewordratio, 0.5) as median26" + "," +
+      "percentile_approx(W3lowercaseratio, 0.5) as median27" + "," + "percentile_approx(W6badwordratio, 0.5) as median28" + "," +
+      "percentile_approx(W7uppercaseratio, 0.5) as median27" + "," + "percentile_approx(W8banwordratio, 0.5) as median27" + " from df"
+
+    val medianValues = sqlContext.sql(Query).rdd
+    val Median = medianValues.first()
+
+    // Median :
+    // Character Ratio Features: UDF
+    val lkpUDF1 = udf { (i: Double) => if (i == 0) Median(0).toString().toDouble else i }
+    val lkpUDF2 = udf { (i: Double) => if (i == 0) Median(1).toString().toDouble else i }
+    val lkpUDF3 = udf { (i: Double) => if (i == 0) Median(2).toString().toDouble else i }
+    val lkpUDF4 = udf { (i: Double) => if (i == 0) Median(3).toString().toDouble else i }
+    val lkpUDF5 = udf { (i: Double) => if (i == 0) Median(4).toString().toDouble else i }
+    val lkpUDF6 = udf { (i: Double) => if (i == 0) Median(5).toString().toDouble else i }
+    val lkpUDF7 = udf { (i: Double) => if (i == 0) Median(6).toString().toDouble else i }
+    val lkpUDF8 = udf { (i: Double) => if (i == 0) Median(7).toString().toDouble else i }
+    val lkpUDF9 = udf { (i: Double) => if (i == 0) Median(8).toString().toDouble else i }
+
+    val lkpUDF11 = udf { (i: Double) => if (i == 0) Median(9).toString().toDouble else i }
+    val lkpUDF12 = udf { (i: Double) => if (i == 0) Median(10).toString().toDouble else i }
+    val lkpUDF13 = udf { (i: Double) => if (i == 0) Median(11).toString().toDouble else i }
+    val lkpUDF14 = udf { (i: Double) => if (i == 0) Median(12).toString().toDouble else i }
+    val lkpUDF15 = udf { (i: Double) => if (i == 0) Median(13).toString().toDouble else i }
+    val lkpUDF16 = udf { (i: Double) => if (i == 0) Median(14).toString().toDouble else i }
+    val lkpUDF17 = udf { (i: Double) => if (i == 0) Median(15).toString().toDouble else i }
+    val lkpUDF18 = udf { (i: Double) => if (i == 0) Median(16).toString().toDouble else i }
+    val lkpUDF19 = udf { (i: Double) => if (i == 0) Median(17).toString().toDouble else i }
+    val lkpUDF20 = udf { (i: Double) => if (i == 0) Median(18).toString().toDouble else i }
+    val lkpUDF21 = udf { (i: Double) => if (i == 0) Median(19).toString().toDouble else i }
+    val lkpUDF22 = udf { (i: Double) => if (i == 0) Median(20).toString().toDouble else i }
+    val lkpUDF23 = udf { (i: Double) => if (i == 0) Median(21).toString().toDouble else i }
+    val lkpUDF24 = udf { (i: Double) => if (i == 0) Median(22).toString().toDouble else i }
+    val lkpUDF25 = udf { (i: Double) => if (i == 0) Median(23).toString().toDouble else i }
+
+    val df1 = Fill_Missing_Final_All_Features.withColumn("FinalC1uppercaseratio", lkpUDF1(col("C1uppercaseratio"))) //.drop("C1uppercaseratio").cache()
+    val df2 = df1.withColumn("FinalC2lowercaseratio", lkpUDF2(col("C2lowercaseratio"))) //.drop("C2lowercaseratio").cache()
+    // df1.unpersist()
+    val df3 = df2.withColumn("FinalC3alphanumericratio", lkpUDF3(col("C3alphanumericratio"))) //.drop("C3alphanumericratio").cache()
+    // df2.unpersist()
+    val df4 = df3.withColumn("FinalC4asciiratio", lkpUDF4(col("C4asciiratio"))) //.drop("C4asciiratio").cache()
+    // df3.unpersist()
+    val df5 = df4.withColumn("FinalC5bracketratio", lkpUDF5(col("C5bracketratio"))) //.drop("C5bracketratio").cache()
+    // df4.unpersist()
+    val df6 = df5.withColumn("FinalC6digitalratio", lkpUDF6(col("C6digitalratio"))) //.drop("C6digitalratio").cache()
+    // df5.unpersist()
+    val df7 = df6.withColumn("FinalC7latinratio", lkpUDF7(col("C7latinratio"))) //.drop("C7latinratio").cache()
+    // df6.unpersist()
+    val df8 = df7.withColumn("FinalC8whitespaceratio", lkpUDF8(col("C8whitespaceratio"))) //.drop("C8whitespaceratio").cache()
+    // df7.unpersist()
+    val df9 = df8.withColumn("FinalC9puncratio", lkpUDF9(col("C9puncratio"))) //.drop("C9puncratio").cache()
+
+    // Mean :
+    // character integer values :
+    val Mean_C10longcharacterseq = Samples.agg(mean("C10longcharacterseq")).head()
+    val C10_Mean = Mean_C10longcharacterseq.getDouble(0)
+    val lkpUDFC10 = udf { (i: Double) => if (i == 0) C10_Mean else i }
+    val df10 = df9.withColumn("FinalC10longcharacterseq", lkpUDFC10(col("C10longcharacterseq")))
+
+    // Median
+    val df11 = df10.withColumn("FinalC11arabicratio", lkpUDF11(col("C11arabicratio"))) //.drop("C11arabicratio").cache()
+    // df9.unpersist()
+    val df12 = df11.withColumn("FinalC12bengaliratio", lkpUDF12(col("C12bengaliratio"))) //.drop("C12bengaliratio").cache()
+    // df11.unpersist()
+    val df13 = df12.withColumn("FinalC13brahmiratio", lkpUDF13(col("C13brahmiratio"))) //.drop("C13brahmiratio").cache()
+    // df12.unpersist()
+    val df14 = df13.withColumn("FinalC14cyrilinratio", lkpUDF14(col("C14cyrilinratio"))) //.drop("C14cyrilinratio").cache()
+    // df13.unpersist()
+    val df15 = df14.withColumn("FinalC15hanratio", lkpUDF15(col("C15hanratio"))) //.drop("C15hanratio").cache()
+    // df14.unpersist()
+    val df16 = df15.withColumn("Finalc16malysiaratio", lkpUDF16(col("c16malysiaratio"))) //.drop("c16malysiaratio").cache()
+    // df15.unpersist()
+    val df17 = df16.withColumn("FinalC17tamiratio", lkpUDF17(col("C17tamiratio"))) //.drop("C17tamiratio").cache()
+    // df16.unpersist()
+    val df18 = df17.withColumn("FinalC18telugratio", lkpUDF18(col("C18telugratio"))) //.drop("C18telugratio").cache()
+    // df17.unpersist()
+    val df19 = df18.withColumn("FinalC19symbolratio", lkpUDF19(col("C19symbolratio"))) //.drop("C19symbolratio").cache()
+    // df18.unpersist()
+    val df20 = df19.withColumn("FinalC20alpharatio", lkpUDF20(col("C20alpharatio"))) //.drop("C20alpharatio").cache()
+    // df19.unpersist()
+    val df21 = df20.withColumn("FinalC21visibleratio", lkpUDF21(col("C21visibleratio"))) //.drop("C21visibleratio").cache()
+    // df20.unpersist()
+    val df22 = df21.withColumn("FinalC22printableratio", lkpUDF22(col("C22printableratio"))) //.drop("C22printableratio").cache()
+    // df21.unpersist()
+    val df23 = df22.withColumn("FinalC23blankratio", lkpUDF23(col("C23blankratio"))) //.drop("C23blankratio").cache()
+    // df22.unpersist()
+    val df24 = df23.withColumn("FinalC24controlratio", lkpUDF24(col("C24controlratio"))) //.drop("C24controlratio").cache()
+    // df23.unpersist()
+    val df25 = df24.withColumn("FinalC25hexaratio", lkpUDF25(col("C25hexaratio"))) //.drop("C25hexaratio").cache()
+
+    // ************************************************End Character Features ****************************************************************************************
+
+    // ************************************************Start Word  Features ****************************************************************************************
+
+    // Word Ratio Features : UDF
+    val lkpUDFW1 = udf { (i: Double) => if (i == 0) Median(24).toString().toDouble else i }
+    val lkpUDFW3 = udf { (i: Double) => if (i == 0) Median(25).toString().toDouble else i }
+    val lkpUDFW6 = udf { (i: Double) => if (i == 0) Median(26).toString().toDouble else i }
+    val lkpUDFW7 = udf { (i: Double) => if (i == 0) Median(27).toString().toDouble else i }
+    val lkpUDFW8 = udf { (i: Double) => if (i == 0) Median(28).toString().toDouble else i }
+
+    // 1.
+    val df26 = df25.withColumn("FinalW1languagewordratio", lkpUDFW1(col("W1languagewordratio"))) //.drop("W1languagewordratio").cache()
+
+    // 2.Boolean(Double) IsContainLanguageWord
+
+    // 3.
+    val df27 = df26.withColumn("FinalW3lowercaseratio", lkpUDFW3(col("W3lowercaseratio"))) //.drop("W3lowercaseratio").cache()
+    // df26.unpersist()
+
+    // 4. Integer " Mean:
+    val Mean_W4longestword = Samples.agg(mean("W4longestword")).head()
+    val W4_Mean = Mean_W4longestword.getDouble(0)
+    val lkpUDFW4 = udf { (i: Double) => if (i == 0) W4_Mean else i }
+    val df28 = df27.withColumn("FinalW4longestword", lkpUDFW4(col("W4longestword")))
+
+    // 5. Boolean (Double ) W5IscontainURL
+    // 6.
+    val df29 = df28.withColumn("FinalW6badwordratio", lkpUDFW6(col("W6badwordratio"))) //.drop("W6badwordratio").cache()
+
+    // 7.
+    val df30 = df29.withColumn("FinalW7uppercaseratio", lkpUDFW7(col("W7uppercaseratio"))) //.drop("W7uppercaseratio").cache()
+
+    // 8.
+    val df31 = df30.withColumn("FinalW8banwordratio", lkpUDFW8(col("W8banwordratio"))) //.drop("W8banwordratio").cache()
+
+    // 9.FemalFirst       Boolean(Double)
+    // 10.Male First      Boolean(Double)
+    // 11.ContainBadWord  Boolean(Double)
+    // 12ContainBanWord   Boolean(Double)
+
+    // 13. Integer(Double):
+    val Mean_W13W13NumberSharewords = Samples.agg(mean("W13NumberSharewords")).head()
+    val W13_Mean = Mean_W13W13NumberSharewords.getDouble(0)
+    val lkpUDFW13 = udf { (i: Double) => if (i == 0) W13_Mean else i }
+    val df32 = df31.withColumn("FinalW13NumberSharewords", lkpUDFW13(col("W13NumberSharewords")))
+
+    // 14. Integer (Double):
+    val Mean_W14NumberSharewordswithoutStopwords = Samples.agg(mean("W14NumberSharewordswithoutStopwords")).head()
+    val W14_Mean = Mean_W14NumberSharewordswithoutStopwords.getDouble(0)
+    val lkpUDFW14 = udf { (i: Double) => if (i == 0) W14_Mean else i }
+    val df33 = df32.withColumn("FinalW14NumberSharewordswithoutStopwords", lkpUDFW14(col("W14NumberSharewordswithoutStopwords")))
+
+    // 15. Double (Not ratio):
+    val Mean_W15PortionQid = Samples.agg(mean("W15PortionQid")).head()
+    val W15_Mean = Mean_W15PortionQid.getDouble(0)
+    val lkpUDFW15 = udf { (i: Double) => if (i == 0) W15_Mean else i }
+    val df34 = df33.withColumn("FinalW15PortionQid", lkpUDFW15(col("W15PortionQid")))
+
+    // 16. Double(Not Ratio):
+    val Mean_W16PortionLnags = Samples.agg(mean("W16PortionLnags")).head()
+    val W16_Mean = Mean_W16PortionLnags.getDouble(0)
+    val lkpUDFW16 = udf { (i: Double) => if (i == 0) W16_Mean else i }
+    val df35 = df34.withColumn("FinalW16PortionLnags", lkpUDFW16(col("W16PortionLnags")))
+
+    // 17.Double(Not ratio):
+    val Mean_W17PortionLinks = Samples.agg(mean("W17PortionLinks")).head()
+    val W17_Mean = Mean_W17PortionLinks.getDouble(0)
+    val lkpUDFW17 = udf { (i: Double) => if (i == 0) W17_Mean else i }
+    val df36 = df35.withColumn("FinalW17PortionLinks", lkpUDFW17(col("W17PortionLinks")))
+
+    // ************************************************End Word  Features ****************************************************************************************
+
+    // ************************************************Start Sentences  Features ****************************************************************************************
+    // 1. Integer(Double)
+    val Mean_S1CommentTailLength = Samples.agg(mean("S1CommentTailLength")).head()
+    val S1_Mean = RoundDouble(Mean_S1CommentTailLength.getDouble(0))
+    val lkpUDFS1 = udf { (i: Double) => if (i == 0) S1_Mean else i }
+    val df37 = df36.withColumn("FinalS1CommentTailLength", lkpUDFS1(col("S1CommentTailLength")))
+
+    // 2. Double  but Not ratio values :
+    val Mean_S2SimikaritySitelinkandLabel = Samples.agg(mean("S2SimikaritySitelinkandLabel")).head()
+    val S2_Mean = RoundDouble(Mean_S2SimikaritySitelinkandLabel.getDouble(0))
+    val lkpUDFS2 = udf { (i: Double) => if (i == 0) S2_Mean else i }
+    val df39 = df37.withColumn("FinalS2SimikaritySitelinkandLabel", lkpUDFS2(col("S2SimikaritySitelinkandLabel")))
+
+    // 3. Double  but Not ratio values :
+    val Mean_S3SimilarityLabelandSitelink = Samples.agg(mean("S3SimilarityLabelandSitelink")).head()
+    val S3_Mean = RoundDouble(Mean_S3SimilarityLabelandSitelink.getDouble(0))
+    val lkpUDFS3 = udf { (i: Double) => if (i == 0.0) S3_Mean else i }
+    val df40 = df39.withColumn("FinalS3SimilarityLabelandSitelink", lkpUDFS3(col("S3SimilarityLabelandSitelink")))
+
+    // 4.  Double  but Not ratio values :
+    val Mean_S4SimilarityCommentComment = Samples.agg(mean("S4SimilarityCommentComment")).head()
+    val S4_Mean = RoundDouble(Mean_S4SimilarityCommentComment.getDouble(0))
+    val lkpUDFS4 = udf { (i: Double) => if (i == 0.0) S4_Mean else i }
+    val df41 = df40.withColumn("FinalS4SimilarityCommentComment", lkpUDFS4(col("S4SimilarityCommentComment")))
+
+    // df41.show()
+    // ************************************************End Sentences  Features ****************************************************************************************
+    // *********************************************** Start Statement  Features ****************************************************************************************
+    // 1. String
+    // 2. String
+    // 3. String
+    // ************************************************End Statement  Features ****************************************************************************************
+    // *********************************************** Start User Features ****************************************************************************************
+
+    // 1.Boolean(Double)
+    // 2.Boolean(Double)
+    // 3.Boolean(Double)
+    // 4.Boolean(Double)
+    // 5.Boolean(Double)
+    // 6.Boolean(Double)
+    // 7. (Double) IP No need to fill Missing Data
+    // 8. (Double) ID No need to fill Missing Data
+    // 9.Boolean(Double)
+    // 10.Boolean(Double)
+
+    // *********************************************** End User Features ****************************************************************************************
+    // *********************************************** Start Item Features ****************************************************************************************
+    // 1. Integer (Double) No need to fill missing values
+    // 2. Integer (Double) No need to fill missing values
+    // 3. Integer (Double) No need to fill missing values
+    // 4. Integer (Double) No need to fill missing values
+    // 5. Integer (Double) No need to fill missing values
+    // 6. Integer (Double) No need to fill missing values
+    // 7. Integer (Double) No need to fill missing values
+    // 8. Integer (Double) No need to fill missing values
+    // 9. Integer (Double) No need to fill missing values
+    // 10. Integer (Double) No need to fill missing values
+    // 11. String
+    // *********************************************** End Item Features ****************************************************************************************
+    // *********************************************** Start Revision Features ****************************************************************************************
+    // 1.String
+    // 2.String
+    // 3.Boolean (Double)
+    // 4.Integer(Double)
+    // 5.String
+    // 6.String
+    // 7. Boolean(Double)
+    // 8. String
+    // 9.String
+    // 10. Integer (Double)
+    // 11.String
+    // 12. integer(Double)
+    // 13. Long(Double)
+    // 14. integer (Double)
+    // 15.String
+    // 16.String
+    // *********************************************** End Revision Features ****************************************************************************************
+    // *********************************************** Meta Data , Truth Data and Frequnces  ****************************************************************************************
+    // Meta
+    //  1.Revision Session :Integer (Converted to Double)
+    // 2. User Country Code
+    // 3.User Continent Code
+    // 4.User Time Size
+    // 5.User Region Code
+    // 6.User-city Name
+    // 7.User Country Name
+    // 8.RevisionTags
+
+    // Truth:
+    // 1.Undo
+
+    // Freq :
+
+    // 1.5 features
+
+    // Roll Boolean     :Boolean (Double)
+    // Undo             :Boolean (Double)
+
+    // *********************************************** End Revision Features ****************************************************************************************
+
+    // ===========================================================================String Features====================================================================================
+
+    val df42 = df41.withColumn(
+      // statement String features:
+      "StringFeatures", concat($"SS1Property", lit(";"), $"SS2DataValue", lit(";"), $"SS3ItemValue", lit(";"), $"I11ItemTitle",
+        // Revision  String Features:
+        lit(";"), $"R1languageRevision",
+        lit(";"), $"R2RevisionLanguageLocal",
+        lit(";"), $"R5RevisionAction",
+        lit(";"), $"R6PrevReviAction",
+        lit(";"), $"R8ParRevision",
+        lit(";"), $"R9RevisionTime",
+        lit(";"), $"R11ContentType",
+        lit(";"), $"R15RevisionSubaction",
+        lit(";"), $"R16PrevReviSubaction",
+
+        lit(";"), $"USER_COUNTRY_CODE",
+        lit(";"), $"USER_CONTINENT_CODE",
+        lit(";"), $"USER_TIME_ZONE",
+        lit(";"), $"USER_REGION_CODE",
+        lit(";"), $"USER_CITY_NAME",
+        lit(";"), $"USER_COUNTY_NAME",
+        lit(";"), $"REVISION_TAGS"))
+
+    val toArray = udf((record: String) => record.split(";").map(_.toString()))
+    val test1 = df42.withColumn("StringFeatures", toArray(col("StringFeatures")))
+    //  test1.show()
+    //  test1.printSchema()
+
+    val word2Vec = new Word2Vec().setInputCol("StringFeatures").setOutputCol("result").setVectorSize(20).setMinCount(0)
+    val model = word2Vec.fit(test1)
+    val result = model.transform(test1) //.rdd
+
+    // result.show()
+
+    val Todense = udf((b: Vector) => b.toDense)
+    val test_new2 = result.withColumn("result", Todense(col("result")))
+
+    val assembler = new VectorAssembler().setInputCols(Array(
+      "result",
+
+      // character
+      "FinalC1uppercaseratio", "FinalC2lowercaseratio", "FinalC3alphanumericratio", "FinalC4asciiratio", "FinalC5bracketratio", "FinalC6digitalratio",
+      "FinalC7latinratio", "FinalC8whitespaceratio", "FinalC9puncratio", "FinalC10longcharacterseq", "FinalC11arabicratio", "FinalC12bengaliratio",
+      "FinalC13brahmiratio", "FinalC14cyrilinratio", "FinalC15hanratio", "Finalc16malysiaratio", "FinalC17tamiratio", "FinalC18telugratio",
+      "FinalC19symbolratio", "FinalC20alpharatio", "FinalC21visibleratio", "FinalC22printableratio", "FinalC23blankratio", "FinalC24controlratio", "FinalC25hexaratio",
+
+      // Words
+      "FinalW1languagewordratio", "W2Iscontainlanguageword", "FinalW3lowercaseratio", "FinalW4longestword", "W5IscontainURL", "FinalW6badwordratio",
+      "FinalW7uppercaseratio", "FinalW8banwordratio", "W9FemalFirstName", "W10MaleFirstName", "W11IscontainBadword", "W12IsContainBanword",
+      "FinalW13NumberSharewords", "FinalW14NumberSharewordswithoutStopwords", "FinalW15PortionQid", "FinalW16PortionLnags", "FinalW17PortionLinks",
+
+      // Sentences :
+      "FinalS1CommentTailLength", "FinalS2SimikaritySitelinkandLabel", "FinalS3SimilarityLabelandSitelink", "FinalS4SimilarityCommentComment",
+
+      // User :
+      "U1IsPrivileged", "U2IsBotUser", "U3IsBotuserWithFlaguser", "U4IsProperty", "U5IsTranslator", "U6IsRegister", "U7IPValue", "U8UserID",
+      "U9HasBirthDate", "U10HasDeathDate",
+
+      // Item:
+
+      "I1NumberLabels", "I2NumberDescription", "I3NumberAliases", "I4NumberClaims", "I5NumberSitelinks", "I6NumberStatement",
+      "I7NumberReferences", "I8NumberQualifier", "I9NumberQualifierOrder", "I10NumberBadges",
+
+      // Revision:
+      "R3IslatainLanguage", "R4JsonLength", "R7RevisionAccountChange", "R10RevisionSize", "R12BytesIncrease",
+      "R13TimeSinceLastRevi", "R14CommentLength",
+
+      // Meta , truth , Freq
+      // meta :
+      "FinalREVISION_SESSION_ID",
       // Truth:
-      //1.Undo
-
-      // Freq :
-
-      //1.5 features
-
-      // Roll Boolean     :Boolean (Double)
-      // Undo             :Boolean (Double)
-
-      //*********************************************** End Revision Features ****************************************************************************************
-
-      //===========================================================================String Features====================================================================================
-
-      val df42 = df41.withColumn(
-        //statement String features:
-        "StringFeatures", concat($"SS1Property", lit(";"), $"SS2DataValue", lit(";"), $"SS3ItemValue", lit(";"), $"I11ItemTitle",
-          //Revision  String Features:
-          lit(";"), $"R1languageRevision",
-          lit(";"), $"R2RevisionLanguageLocal",
-          lit(";"), $"R5RevisionAction",
-          lit(";"), $"R6PrevReviAction",
-          lit(";"), $"R8ParRevision",
-          lit(";"), $"R9RevisionTime",
-          lit(";"), $"R11ContentType",
-          lit(";"), $"R15RevisionSubaction",
-          lit(";"), $"R16PrevReviSubaction",
-
-          lit(";"), $"USER_COUNTRY_CODE",
-          lit(";"), $"USER_CONTINENT_CODE",
-          lit(";"), $"USER_TIME_ZONE",
-          lit(";"), $"USER_REGION_CODE",
-          lit(";"), $"USER_CITY_NAME",
-          lit(";"), $"USER_COUNTY_NAME",
-          lit(";"), $"REVISION_TAGS"))
-
-      val toArray = udf((record: String) => record.split(";").map(_.toString()))
-      val test1 = df42.withColumn("StringFeatures", toArray(col("StringFeatures")))
-      //  test1.show()
-      //  test1.printSchema()
-
-      val word2Vec = new Word2Vec().setInputCol("StringFeatures").setOutputCol("result").setVectorSize(20).setMinCount(0)
-      val model = word2Vec.fit(test1)
-      val result = model.transform(test1) //.rdd
-
-      // result.show()
-
-      val Todense = udf((b: Vector) => b.toDense)
-      val test_new2 = result.withColumn("result", Todense(col("result")))
-
-      val assembler = new VectorAssembler().setInputCols(Array(
-        "result",
-
-        // character
-        "FinalC1uppercaseratio", "FinalC2lowercaseratio", "FinalC3alphanumericratio", "FinalC4asciiratio", "FinalC5bracketratio", "FinalC6digitalratio",
-        "FinalC7latinratio", "FinalC8whitespaceratio", "FinalC9puncratio", "FinalC10longcharacterseq", "FinalC11arabicratio", "FinalC12bengaliratio",
-        "FinalC13brahmiratio", "FinalC14cyrilinratio", "FinalC15hanratio", "Finalc16malysiaratio", "FinalC17tamiratio", "FinalC18telugratio",
-        "FinalC19symbolratio", "FinalC20alpharatio", "FinalC21visibleratio", "FinalC22printableratio", "FinalC23blankratio", "FinalC24controlratio", "FinalC25hexaratio",
-
-        // Words
-        "FinalW1languagewordratio", "W2Iscontainlanguageword", "FinalW3lowercaseratio", "FinalW4longestword", "W5IscontainURL", "FinalW6badwordratio",
-        "FinalW7uppercaseratio", "FinalW8banwordratio", "W9FemalFirstName", "W10MaleFirstName", "W11IscontainBadword", "W12IsContainBanword",
-        "FinalW13NumberSharewords", "FinalW14NumberSharewordswithoutStopwords", "FinalW15PortionQid", "FinalW16PortionLnags", "FinalW17PortionLinks",
-
-        //Sentences :
-        "FinalS1CommentTailLength", "FinalS2SimikaritySitelinkandLabel", "FinalS3SimilarityLabelandSitelink", "FinalS4SimilarityCommentComment",
+      "FinalUNDO_RESTORE_REVERTED",
 
-        // User :
-        "U1IsPrivileged", "U2IsBotUser", "U3IsBotuserWithFlaguser", "U4IsProperty", "U5IsTranslator", "U6IsRegister", "U7IPValue", "U8UserID",
-        "U9HasBirthDate", "U10HasDeathDate",
+      // Freq:
+      "FinalNumberofRevisionsUserContributed",
+      "FinalNumberofUniqueItemsUseredit", "FinalNumberRevisionItemHas", "FinalNumberUniqUserEditItem", "FinalFreqItem")).setOutputCol("features")
+    val Training_Data = assembler.transform(test_new2)
 
-        //Item:
-
-        "I1NumberLabels", "I2NumberDescription", "I3NumberAliases", "I4NumberClaims", "I5NumberSitelinks", "I6NumberStatement",
-        "I7NumberReferences", "I8NumberQualifier", "I9NumberQualifierOrder", "I10NumberBadges",
-
-        //Revision:
-        "R3IslatainLanguage", "R4JsonLength", "R7RevisionAccountChange", "R10RevisionSize", "R12BytesIncrease",
-        "R13TimeSinceLastRevi", "R14CommentLength",
-
-        // Meta , truth , Freq
-        // meta :
-        "FinalREVISION_SESSION_ID",
-        // Truth:
-        "FinalUNDO_RESTORE_REVERTED",
-
-        //Freq:
-        "FinalNumberofRevisionsUserContributed",
-        "FinalNumberofUniqueItemsUseredit", "FinalNumberRevisionItemHas", "FinalNumberUniqUserEditItem", "FinalFreqItem")).setOutputCol("features")
-      val Training_Data = assembler.transform(test_new2)
-
-      // Prepare the data for classification:
+    // Prepare the data for classification:
     //  NewData.registerTempTable("DB")
     //  val Training_Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB")
-     //val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB") // for logistic regrision
+    // val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB") // for logistic regrision
 
-      //Data.show()
+    // Data.show()
 
-      //val TestClassifiers = new Classifiers()
-//
-     //   TestClassifiers.RandomForestClassifer(Data, sqlContext)
-//      // TestClassifiers.DecisionTreeClassifier(Data, sqlContext)
-//      // TestClassifiers.LogisticRegrision(Data, sqlContext)
-//      // TestClassifiers.GradientBoostedTree(Data, sqlContext)
-//      // TestClassifiers.MultilayerPerceptronClassifier(Data, sqlContext)
+    // val TestClassifiers = new Classifiers()
+    //
+    //   TestClassifiers.RandomForestClassifer(Data, sqlContext)
+    //      // TestClassifiers.DecisionTreeClassifier(Data, sqlContext)
+    //      // TestClassifiers.LogisticRegrision(Data, sqlContext)
+    //      // TestClassifiers.GradientBoostedTree(Data, sqlContext)
+    //      // TestClassifiers.MultilayerPerceptronClassifier(Data, sqlContext)
 
     Training_Data
-   
-    
+
   }
 
-  
-    //***********************************************************************************************************************************************
-  // Function 3:Testing XML and Vandalism Detection 
+  // ***********************************************************************************************************************************************
+  // Function 3:Testing XML and Vandalism Detection
   def Testing_Start_StandardXMLParser_VD(sc: SparkContext): DataFrame = {
     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
     import sqlContext.implicits._
     import org.apache.spark.sql.functions._ // for UDF
     import org.apache.spark.sql.types._
 
-          // Streaming records:
-      val jobConf = new JobConf()
-      val NormalXML_Parser_OBJ = new ParseNormalXML()
-      val RDD_OBJ = new ParseNormalXML()
-    
-      val Testing_RDD_All_Record = RDD_OBJ.Testing_DB_NormalXML_Parser(sc).cache()
-
-
-      // ======= Json part :
-      //Json RDD : Each record has its Revision iD:
-      val JsonRDD = Testing_RDD_All_Record.map(_.split("NNLL")).map(v => replacing_with_Quoto(v(0), v(8))).cache()
-      //JsonRDD.foreach(println)
-      //println(JsonRDD.count())
-
-      // Data set
-      val Ds_Json = sqlContext.jsonRDD(JsonRDD).select("key", "id", "labels", "descriptions", "aliases", "claims", "sitelinks").cache()
-      //Ds_Json.show()
-      // println(Ds_Json.count())
-
-      // ======= Tags part : // Contributor IP here is in Decimal format not IP format and It is converted in ParseNormalXml stage
-      val TagsRDD = Testing_RDD_All_Record.map(_.split("NNLL")).map(x => (x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11))).cache()
-      val DF_Tags = TagsRDD.toDF("Rid", "Itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "model", "format", "sha").cache()
-      //    DF_Tags.show()
-      //    println(DF_Tags.count())
-
-      //======== Join Json part with Tag Part:============================
-      //Joining to have full data
-      val DF_First_DF_Result_Join_Tags_and_Json = DF_Tags.as("T1").join(Ds_Json.as("T2"), $"T1.Rid" === $"T2.key", "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha") //.orderBy("Rid", "Itemid")
-      DF_First_DF_Result_Join_Tags_and_Json.registerTempTable("Data1")
-      val dfr_DATA_JsonTages1 = sqlContext.sql("select * from Data1 order by itemid ,Rid ").cache()
-
-      val colNames = Seq("Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-      val DF_Second = DF_First_DF_Result_Join_Tags_and_Json.toDF(colNames: _*) //.distinct()
-      DF_Second.registerTempTable("Data2")
-
-      //===================================================================Parent // Previous Revision==============================================================================================================
-      //val DF_Joined = result1.as("df1").join(result2.as("df2"), col("itemid") === col("itemid2") && col("index1") === col("index2") + 1, "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha", "Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-      //.select("itemid", "Rid","pid","time","itemid2","Rid2","pid2","time2")
-
-      //Joining based on Parent Id to get the previous cases: ParentID
-      val DF_Joined = DF_First_DF_Result_Join_Tags_and_Json.as("df1").join(DF_Second.as("df2"), $"df1.pid" === $"df2.Rid2", "leftouter").distinct()
-
-      val RDD_After_JoinDF = DF_Joined.rdd.distinct()
-      val x = RDD_After_JoinDF.map(row => (row(0).toString().toInt, row)).cache()
-      val part = new RangePartitioner(4, x)
-      val partitioned = x.partitionBy(part).persist() // persist is important for this case and obligatory.
-      //partitioned.foreach(println)
-      //
-      //      //=====================================================All Features Based on Categories of Features Data Type :==================================================================================
-      //
-      val Result_all_Features = partitioned.map { case (x, y) => (x.toString() + "," + All_Features(y).toString()) } // we convert the Pair RDD to String one LineRDD to be able to make DF based on ","
-      //Result_all_Features.foreach(println)
-      // println("nayef" + Result_all_Features.count())
-
-      // Conver the RDD of All Features to  DataFrame:
-
-      val schema = StructType(
-
-        //0
-        StructField("Rid", IntegerType, false) ::
-
-          // Character Features :
-          /* 1*/ StructField("C1uppercaseratio", DoubleType, false) :: /*2 */ StructField("C2lowercaseratio", DoubleType, false) :: /*3*/ StructField("C3alphanumericratio", DoubleType, false) ::
-          /*4*/ StructField("C4asciiratio", DoubleType, false) :: /*5*/ StructField("C5bracketratio", DoubleType, false) :: /*6*/ StructField("C6digitalratio", DoubleType, false) ::
-          /*7*/ StructField("C7latinratio", DoubleType, false) :: /*8*/ StructField("C8whitespaceratio", DoubleType, false) :: /* 9*/ StructField("C9puncratio", DoubleType, false) ::
-          /*10*/ StructField("C10longcharacterseq", DoubleType, false) :: /*11*/ StructField("C11arabicratio", DoubleType, false) :: /*12*/ StructField("C12bengaliratio", DoubleType, false) ::
-          /*13 */ StructField("C13brahmiratio", DoubleType, false) :: /*14*/ StructField("C14cyrilinratio", DoubleType, false) :: /*15*/ StructField("C15hanratio", DoubleType, false) ::
-          /*16*/ StructField("c16malysiaratio", DoubleType, false) :: /*17*/ StructField("C17tamiratio", DoubleType, false) :: /*18*/ StructField("C18telugratio", DoubleType, false) ::
-          /*19 */ StructField("C19symbolratio", DoubleType, false) :: /*20 */ StructField("C20alpharatio", DoubleType, false) :: /*21*/ StructField("C21visibleratio", DoubleType, false) ::
-          /*22*/ StructField("C22printableratio", DoubleType, false) :: /*23*/ StructField("C23blankratio", DoubleType, false) :: /*24 */ StructField("C24controlratio", DoubleType, false) ::
-          /* 25 */ StructField("C25hexaratio", DoubleType, false) ::
-
-          //word Features:
-          /*26*/ StructField("W1languagewordratio", DoubleType, false) :: /*27 Boolean */ StructField("W2Iscontainlanguageword", DoubleType, false) :: /*28*/ StructField("W3lowercaseratio", DoubleType, false) ::
-          /*29 Integer */ StructField("W4longestword", IntegerType, false) :: /*30 Boolean */ StructField("W5IscontainURL", DoubleType, false) :: /*31*/ StructField("W6badwordratio", DoubleType, false) ::
-          /*32*/ StructField("W7uppercaseratio", DoubleType, false) :: /*33*/ StructField("W8banwordratio", DoubleType, false) :: /*34 Boolean */ StructField("W9FemalFirstName", DoubleType, false) ::
-          /*35 Boolean */ StructField("W10MaleFirstName", DoubleType, false) :: /*36 Boolean */ StructField("W11IscontainBadword", DoubleType, false) :: /*37 Boolean*/ StructField("W12IsContainBanword", DoubleType, false) ::
-          /*38 integer */ StructField("W13NumberSharewords", DoubleType, false) :: /*39 Integer */ StructField("W14NumberSharewordswithoutStopwords", DoubleType, false) ::
-          /*40*/ StructField("W15PortionQid", DoubleType, false) :: /*41*/ StructField("W16PortionLnags", DoubleType, false) :: /*42*/ StructField("W17PortionLinks", DoubleType, false) ::
-
-          //
-          //          // Sentences Features:
-          /*43*/ StructField("S1CommentTailLength", DoubleType, false) :: /*44*/ StructField("S2SimikaritySitelinkandLabel", DoubleType, false) :: /*45*/ StructField("S3SimilarityLabelandSitelink", DoubleType, false) :: /*46*/ StructField("S4SimilarityCommentComment", DoubleType, false) ::
-          //
-          //          // Statements Features :
-          /*47*/ StructField("SS1Property", StringType, false) :: /*48*/ StructField("SS2DataValue", StringType, false) :: /*49*/ StructField("SS3ItemValue", StringType, false) ::
-          //
-          //
-          //        //User Features :
-          /*50 Boolean*/ StructField("U1IsPrivileged", DoubleType, false) :: /*51 Boolean*/ StructField("U2IsBotUser", DoubleType, false) :: /*52 Boolean*/ StructField("U3IsBotuserWithFlaguser", DoubleType, false) ::
-          /*53 Boolean*/ StructField("U4IsProperty", DoubleType, false) :: /*54 Boolean*/ StructField("U5IsTranslator", DoubleType, false) :: /*55 Boolean*/ StructField("U6IsRegister", DoubleType, false) ::
-          /*56*/ StructField("U7IPValue", DoubleType, false) :: /*57*/ StructField("U8UserID", IntegerType, false) :: /*58*/ StructField("U9HasBirthDate", DoubleType, false) :: /*59*/ StructField("U10HasDeathDate", DoubleType, false) ::
-
-          //Items Features :
-
-          /*60*/ StructField("I1NumberLabels", DoubleType, false) :: /*61*/ StructField("I2NumberDescription", DoubleType, false) :: /*62*/ StructField("I3NumberAliases", DoubleType, false) :: /*63*/ StructField("I4NumberClaims", DoubleType, false) ::
-          /*64*/ StructField("I5NumberSitelinks", DoubleType, false) :: /*65*/ StructField("I6NumberStatement", DoubleType, false) :: /*66*/ StructField("I7NumberReferences", DoubleType, false) :: /*67*/ StructField("I8NumberQualifier", DoubleType, false) ::
-          /*68*/ StructField("I9NumberQualifierOrder", DoubleType, false) :: /*69*/ StructField("I10NumberBadges", DoubleType, false) :: /*70*/ StructField("I11ItemTitle", StringType, false) ::
-
-          // Revision Features:
-          /*71*/ StructField("R1languageRevision", StringType, false) :: /*72*/ StructField("R2RevisionLanguageLocal", StringType, false) :: /*73*/ StructField("R3IslatainLanguage", DoubleType, false) ::
-          /*74*/ StructField("R4JsonLength", DoubleType, false) :: /*75*/ StructField("R5RevisionAction", StringType, false) :: /*76*/ StructField("R6PrevReviAction", StringType, false) ::
-          /*77*/ StructField("R7RevisionAccountChange", DoubleType, false) :: /*78*/ StructField("R8ParRevision", StringType, false) :: /*79*/ StructField("R9RevisionTime", StringType, false) ::
-          /*80*/ StructField("R10RevisionSize", DoubleType, false) :: /*81*/ StructField("R11ContentType", StringType, false) :: /*82*/ StructField("R12BytesIncrease", DoubleType, false) ::
-          /*83*/ StructField("R13TimeSinceLastRevi", DoubleType, false) :: /*84*/ StructField("R14CommentLength", DoubleType, false) :: /*85*/ StructField("R15RevisionSubaction", StringType, false) ::
-          /*86*/ StructField("R16PrevReviSubaction", StringType, false) ::
-
-          Nil)
-
-      val rowRDD = Result_all_Features.map(line => line.split(",")).map(e ⇒ Row(e(0).toInt // character feature column
-      , e(1).toDouble, e(2).toDouble, e(3).toDouble, e(4).toDouble, e(5).toDouble, e(6).toDouble, e(7).toDouble, e(8).toDouble, e(9).toDouble, RoundDouble(e(10).toDouble),
-        e(11).toDouble, e(12).toDouble, e(13).toDouble, e(14).toDouble, e(15).toDouble, e(16).toDouble, e(17).toDouble, e(18).toDouble, e(19).toDouble, e(20).toDouble, e(21).toDouble, e(22).toDouble, e(23).toDouble, e(24).toDouble, e(25).toDouble //Word Feature column
-        , e(26).toDouble, e(27).toDouble, e(28).toDouble, e(29).toDouble.toInt, e(30).toDouble, e(31).toDouble, e(32).toDouble, e(33).toDouble, e(34).toDouble, e(35).toDouble, e(36).toDouble, e(37).toDouble, RoundDouble(e(38).toDouble), RoundDouble(e(39).toDouble), e(40).toDouble, e(41).toDouble, e(42).toDouble // Sentences Features column:
-        , RoundDouble(e(43).toDouble), e(44).toDouble, e(45).toDouble, e(46).toDouble //Statement Features Column: 
-        , e(47), e(48), e(49) // User Features Column: 
-        , e(50).toDouble, e(51).toDouble, e(52).toDouble, e(53).toDouble, e(54).toDouble, e(55).toDouble, e(56).toDouble, e(57).toDouble.toInt, e(58).toDouble, e(59).toDouble //Item Features column:
-        , e(60).toDouble, e(61).toDouble, e(62).toDouble, e(63).toDouble, e(64).toDouble, e(65).toDouble, e(66).toDouble, e(67).toDouble, e(68).toDouble, e(69).toDouble, "Q" + e(70).toDouble.toInt.toString() //Revision Features Column: 
-        , e(71), e(72), e(73).toDouble, e(74).toDouble, e(75), e(76), e(77).toDouble, e(78), e(79), e(80).toDouble, e(81), e(82).toDouble, e(83).toDouble, e(84).toDouble, e(85), e(86)))
-
-      //a.User Frequency:
-      //number of revisions a user has contributed
-      //val resu= DF_Tags.groupBy("contributorID").agg(count("Rid"))
-      DF_Tags.registerTempTable("TagesTable")
-      val ContributorFreq_for_Each_Revision_DF = sqlContext.sql("select contributorID as CIDUSER1, count(Rid) as NumberofRevisionsUserContributed from TagesTable where contributorID !='0' group by contributorID ") //.drop("CIDUSER1")
-      //ContributorFreq_for_Each_Revision_DF.show()
-
-      //b.Cumulated : Number of a unique Item a user has contributed.
-      val CumulatedNumberof_uniqueItemsForUser_DF = sqlContext.sql("select contributorID as CIDUSER2,  COUNT(DISTINCT itemid) as NumberofUniqueItemsUseredit from TagesTable where contributorID !='0' group by contributorID") //.drop("CIDUSER2")
-      //CumulatedNumberof_uniqueItemsForUser_DF.show()
-
-      //1.Item Frequency:
-      // number of revisions an Item has
-      val ItemFrequ_DF = sqlContext.sql("select itemid, count(Rid) as NumberRevisionItemHas from TagesTable  group by itemid")
-      // ItemFrequ_DF.show()
-
-      //2. Cumulate number of unique users have edited the Item : Did not consider the users IP. Contributor is an IP or Name. we consider name
-      val CumulatedNumberof_UniqueUserForItem_DF = sqlContext.sql("select itemid,  COUNT(DISTINCT contributorID) as NumberUniqUserEditItem from TagesTable where contributorID !='0' group by itemid")
-      //CumulatedNumberof_UniqueUserForItem_DF.show()
-
-      //3. freq each Item :
-      val Fre_Item_DF = sqlContext.sql("select itemid,  COUNT(itemid) as FreqItem from TagesTable  group by itemid")
-      // Fre_Item_DF.show()
-
-      //*****************************************************************************************************************************************
-      // This is Main DataFrame:
-      val BeforeJoin_All_Features = sqlContext.createDataFrame(rowRDD, schema)
-      //BeforeJoin_All_Features.show()
-
-      //********************************** User feature Join
-
-      // Join1 for add The first User Feature : number of revisions a user has contributed
-      val AfterJoinUser1_All_Features = BeforeJoin_All_Features.as("T1").join(ContributorFreq_for_Each_Revision_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER1", "leftouter").drop("CIDUSER1")
-      //AfterJoinUser1_All_Features.show()
-
-      // Join2 for add The second  User Feature
-      val AfterJoinUser2_All_Features = AfterJoinUser1_All_Features.as("T1").join(CumulatedNumberof_uniqueItemsForUser_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER2", "leftouter").drop("CIDUSER2")
-      //AfterJoinUser2_All_Features.show()
-
-      //********************************** Item Feature Join
-      // Join3 for add The First  Item Feature :number of revisions an Item has
-      val AfterJoinItem3_All_Features = AfterJoinUser2_All_Features.as("T1").join(ItemFrequ_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      // AfterJoinItem3_All_Features.show()
-
-      // Join4 for add The Second  Item Feature
-      val AfterJoinItem4_All_Features = AfterJoinItem3_All_Features.as("T1").join(CumulatedNumberof_UniqueUserForItem_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      // AfterJoinItem4_All_Features.show()
-
-      // Join5 for add The Third  Item Feature
-      val AfterJoinItem5_All_Features = AfterJoinItem4_All_Features.as("T1").join(Fre_Item_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-      //2 AfterJoinItem5_All_Features.show()
-
-      //********************************
-
-      //*Geografical information Feature from Meta File
-      //REVISION_ID|REVISION_SESSION_ID|USER_COUNTRY_CODE|USER_CONTINENT_CODE|USER_TIME_ZONE|USER_REGION_CODE|USER_CITY_NAME|USER_COUNTY_NAME|REVISION_TAGS
-      val df_GeoInf = sqlContext.read
-        .format("com.databricks.spark.csv")
-        .option("header", "true") // Use first line of all files as header
-        .option("inferSchema", "true") // Automatically infer data types
-        .load("hdfs://localhost:9000/mydata/Meta.csv").select("REVISION_ID", "REVISION_SESSION_ID", "USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")
-      // df_GeoInf.show()
-
-      val df_Truth = sqlContext.read
-        .format("com.databricks.spark.csv")
-        .option("header", "true") // Use first line of all files as header
-        .option("inferSchema", "true") // Automatically infer data types
-        .load("hdfs://localhost:9000/mydata/truth.csv").select("REVISION_ID", "ROLLBACK_REVERTED", "UNDO_RESTORE_REVERTED")
-      // df_GeoInf.show()
-
-      val AfterJoinGeoInfo_All_Features = AfterJoinItem5_All_Features.as("T1").join(df_GeoInf.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-      // AfterJoinGeoInfo_All_Features.show()
-
-      val Final_All_Features = AfterJoinGeoInfo_All_Features.as("T1").join(df_Truth.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-      //Final_All_Features.show()
-
-      // Pre- process Data ============================================================================================================================================================
-
-      // For String Column, We fill the Null values by "NA":
-
-      var Fill_Missing_Final_All_Features = Final_All_Features.na.fill("NA", Seq("USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")).cache()
-
-      // For Integer Frequency  Column, We fill the Null values by 0:
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.na.fill(0, Seq("FreqItem", "NumberUniqUserEditItem", "NumberRevisionItemHas", "NumberofUniqueItemsUseredit", "NumberofRevisionsUserContributed", "REVISION_SESSION_ID")).cache()
-      //Fill_Missing_Final_All_Features.show()
-
-      val BoolToDoubleUDF = udf { (BoolAsString: String) => if (BoolAsString == "T") 1.0 else 0.0 }
-      val IntegerToDouble = udf { (IntegerRevisionSessionID: Integer) => IntegerRevisionSessionID.toDouble }
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalROLLBACK_REVERTED", BoolToDoubleUDF(col("ROLLBACK_REVERTED")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalUNDO_RESTORE_REVERTED", BoolToDoubleUDF(col("UNDO_RESTORE_REVERTED")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalREVISION_SESSION_ID", IntegerToDouble(col("REVISION_SESSION_ID")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofRevisionsUserContributed", IntegerToDouble(col("NumberofRevisionsUserContributed")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofUniqueItemsUseredit", IntegerToDouble(col("NumberofUniqueItemsUseredit")))
-
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberRevisionItemHas", IntegerToDouble(col("NumberRevisionItemHas")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberUniqUserEditItem", IntegerToDouble(col("NumberUniqUserEditItem")))
-      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalFreqItem", IntegerToDouble(col("FreqItem")))
-
-      //===========================================================================Caharacter Features : Double , Integer Features ====================================================================================
-      //Double Ratio:  For Ratio Double column, Fill -1 value by Median:Character Features + Ratio of Word Features :
-      var Samples = Fill_Missing_Final_All_Features.sample(false, 0.001).cache() //.where($"S2SimikaritySitelinkandLabel">0.0 || $"S3SimilarityLabelandSitelink">0.0 || $"S4SimilarityCommentComment">0.0)
-      Samples.registerTempTable("df")
-
-      val Query = "select " +
-        "percentile_approx(C1uppercaseratio, 0.5) as meadian1" + "," + "percentile_approx(C2lowercaseratio, 0.5) as median2" + " ," +
-        "percentile_approx(C3alphanumericratio, 0.5) as median3" + "," + "percentile_approx(C4asciiratio, 0.5) as median4" + "," +
-        "percentile_approx(C5bracketratio, 0.5) as median5" + "," + "percentile_approx(C6digitalratio, 0.5) as median6" + "," +
-        "percentile_approx(C7latinratio, 0.5) as median7" + "," + "percentile_approx(C8whitespaceratio, 0.5) as median8" + "," +
-        "percentile_approx(C9puncratio, 0.5) as median9" + "," + "percentile_approx(C11arabicratio, 0.5) as median11" + "," +
-        "percentile_approx(C12bengaliratio, 0.5) as median12" + "," + "percentile_approx(C13brahmiratio, 0.5) as median13" + "," +
-        "percentile_approx(C14cyrilinratio, 0.5) as median14" + "," + "percentile_approx(C15hanratio, 0.5) as median15" + "," +
-        "percentile_approx(c16malysiaratio, 0.5) as median16" + "," +
-        "percentile_approx(C17tamiratio, 0.5) as median17" + "," + "percentile_approx(C18telugratio, 0.5) as median18" + "," +
-        "percentile_approx(C19symbolratio, 0.5) as median19" + "," + "percentile_approx(C20alpharatio, 0.5) as median20" + "," +
-        "percentile_approx(C21visibleratio, 0.5) as median21" + "," + "percentile_approx(C22printableratio, 0.5) as median22" + "," +
-        "percentile_approx(C23blankratio, 0.5) as median23" + "," + "percentile_approx(C24controlratio, 0.5) as median24" + "," +
-        "percentile_approx(C25hexaratio, 0.5) as median25" ++ "," + "percentile_approx(W1languagewordratio, 0.5) as median26" + "," +
-        "percentile_approx(W3lowercaseratio, 0.5) as median27" + "," + "percentile_approx(W6badwordratio, 0.5) as median28" + "," +
-        "percentile_approx(W7uppercaseratio, 0.5) as median27" + "," + "percentile_approx(W8banwordratio, 0.5) as median27" + " from df"
-
-      val medianValues = sqlContext.sql(Query).rdd
-      val Median = medianValues.first()
-
-      // Median :
-      // Character Ratio Features: UDF
-      val lkpUDF1 = udf { (i: Double) => if (i == 0) Median(0).toString().toDouble else i }
-      val lkpUDF2 = udf { (i: Double) => if (i == 0) Median(1).toString().toDouble else i }
-      val lkpUDF3 = udf { (i: Double) => if (i == 0) Median(2).toString().toDouble else i }
-      val lkpUDF4 = udf { (i: Double) => if (i == 0) Median(3).toString().toDouble else i }
-      val lkpUDF5 = udf { (i: Double) => if (i == 0) Median(4).toString().toDouble else i }
-      val lkpUDF6 = udf { (i: Double) => if (i == 0) Median(5).toString().toDouble else i }
-      val lkpUDF7 = udf { (i: Double) => if (i == 0) Median(6).toString().toDouble else i }
-      val lkpUDF8 = udf { (i: Double) => if (i == 0) Median(7).toString().toDouble else i }
-      val lkpUDF9 = udf { (i: Double) => if (i == 0) Median(8).toString().toDouble else i }
-
-      val lkpUDF11 = udf { (i: Double) => if (i == 0) Median(9).toString().toDouble else i }
-      val lkpUDF12 = udf { (i: Double) => if (i == 0) Median(10).toString().toDouble else i }
-      val lkpUDF13 = udf { (i: Double) => if (i == 0) Median(11).toString().toDouble else i }
-      val lkpUDF14 = udf { (i: Double) => if (i == 0) Median(12).toString().toDouble else i }
-      val lkpUDF15 = udf { (i: Double) => if (i == 0) Median(13).toString().toDouble else i }
-      val lkpUDF16 = udf { (i: Double) => if (i == 0) Median(14).toString().toDouble else i }
-      val lkpUDF17 = udf { (i: Double) => if (i == 0) Median(15).toString().toDouble else i }
-      val lkpUDF18 = udf { (i: Double) => if (i == 0) Median(16).toString().toDouble else i }
-      val lkpUDF19 = udf { (i: Double) => if (i == 0) Median(17).toString().toDouble else i }
-      val lkpUDF20 = udf { (i: Double) => if (i == 0) Median(18).toString().toDouble else i }
-      val lkpUDF21 = udf { (i: Double) => if (i == 0) Median(19).toString().toDouble else i }
-      val lkpUDF22 = udf { (i: Double) => if (i == 0) Median(20).toString().toDouble else i }
-      val lkpUDF23 = udf { (i: Double) => if (i == 0) Median(21).toString().toDouble else i }
-      val lkpUDF24 = udf { (i: Double) => if (i == 0) Median(22).toString().toDouble else i }
-      val lkpUDF25 = udf { (i: Double) => if (i == 0) Median(23).toString().toDouble else i }
-
-      val df1 = Fill_Missing_Final_All_Features.withColumn("FinalC1uppercaseratio", lkpUDF1(col("C1uppercaseratio"))) //.drop("C1uppercaseratio").cache()
-      val df2 = df1.withColumn("FinalC2lowercaseratio", lkpUDF2(col("C2lowercaseratio"))) //.drop("C2lowercaseratio").cache()
-      //df1.unpersist()
-      val df3 = df2.withColumn("FinalC3alphanumericratio", lkpUDF3(col("C3alphanumericratio"))) //.drop("C3alphanumericratio").cache()
-      //df2.unpersist()
-      val df4 = df3.withColumn("FinalC4asciiratio", lkpUDF4(col("C4asciiratio"))) //.drop("C4asciiratio").cache()
-      //df3.unpersist()
-      val df5 = df4.withColumn("FinalC5bracketratio", lkpUDF5(col("C5bracketratio"))) //.drop("C5bracketratio").cache()
-      //df4.unpersist()
-      val df6 = df5.withColumn("FinalC6digitalratio", lkpUDF6(col("C6digitalratio"))) //.drop("C6digitalratio").cache()
-      //df5.unpersist()
-      val df7 = df6.withColumn("FinalC7latinratio", lkpUDF7(col("C7latinratio"))) //.drop("C7latinratio").cache()
-      //df6.unpersist()
-      val df8 = df7.withColumn("FinalC8whitespaceratio", lkpUDF8(col("C8whitespaceratio"))) //.drop("C8whitespaceratio").cache()
-      //df7.unpersist()
-      val df9 = df8.withColumn("FinalC9puncratio", lkpUDF9(col("C9puncratio"))) //.drop("C9puncratio").cache()
-
-      // Mean :
-      // character integer values :
-      val Mean_C10longcharacterseq = Samples.agg(mean("C10longcharacterseq")).head()
-      val C10_Mean = Mean_C10longcharacterseq.getDouble(0)
-      val lkpUDFC10 = udf { (i: Double) => if (i == 0) C10_Mean else i }
-      val df10 = df9.withColumn("FinalC10longcharacterseq", lkpUDFC10(col("C10longcharacterseq")))
-
-      //Median
-      val df11 = df10.withColumn("FinalC11arabicratio", lkpUDF11(col("C11arabicratio"))) //.drop("C11arabicratio").cache()
-      // df9.unpersist()
-      val df12 = df11.withColumn("FinalC12bengaliratio", lkpUDF12(col("C12bengaliratio"))) //.drop("C12bengaliratio").cache()
-      //df11.unpersist()
-      val df13 = df12.withColumn("FinalC13brahmiratio", lkpUDF13(col("C13brahmiratio"))) //.drop("C13brahmiratio").cache()
-      // df12.unpersist()
-      val df14 = df13.withColumn("FinalC14cyrilinratio", lkpUDF14(col("C14cyrilinratio"))) //.drop("C14cyrilinratio").cache()
-      // df13.unpersist()
-      val df15 = df14.withColumn("FinalC15hanratio", lkpUDF15(col("C15hanratio"))) //.drop("C15hanratio").cache()
-      // df14.unpersist()
-      val df16 = df15.withColumn("Finalc16malysiaratio", lkpUDF16(col("c16malysiaratio"))) //.drop("c16malysiaratio").cache()
-      //df15.unpersist()
-      val df17 = df16.withColumn("FinalC17tamiratio", lkpUDF17(col("C17tamiratio"))) //.drop("C17tamiratio").cache()
-      //df16.unpersist()
-      val df18 = df17.withColumn("FinalC18telugratio", lkpUDF18(col("C18telugratio"))) //.drop("C18telugratio").cache()
-      //df17.unpersist()
-      val df19 = df18.withColumn("FinalC19symbolratio", lkpUDF19(col("C19symbolratio"))) //.drop("C19symbolratio").cache()
-      //df18.unpersist()
-      val df20 = df19.withColumn("FinalC20alpharatio", lkpUDF20(col("C20alpharatio"))) //.drop("C20alpharatio").cache()
-      // df19.unpersist()
-      val df21 = df20.withColumn("FinalC21visibleratio", lkpUDF21(col("C21visibleratio"))) //.drop("C21visibleratio").cache()
-      // df20.unpersist()
-      val df22 = df21.withColumn("FinalC22printableratio", lkpUDF22(col("C22printableratio"))) //.drop("C22printableratio").cache()
-      //df21.unpersist()
-      val df23 = df22.withColumn("FinalC23blankratio", lkpUDF23(col("C23blankratio"))) //.drop("C23blankratio").cache()
-      // df22.unpersist()
-      val df24 = df23.withColumn("FinalC24controlratio", lkpUDF24(col("C24controlratio"))) //.drop("C24controlratio").cache()
-      //df23.unpersist()
-      val df25 = df24.withColumn("FinalC25hexaratio", lkpUDF25(col("C25hexaratio"))) //.drop("C25hexaratio").cache()
-
-      //************************************************End Character Features ****************************************************************************************
-
-      //************************************************Start Word  Features ****************************************************************************************
-
-      // Word Ratio Features : UDF
-      val lkpUDFW1 = udf { (i: Double) => if (i == 0) Median(24).toString().toDouble else i }
-      val lkpUDFW3 = udf { (i: Double) => if (i == 0) Median(25).toString().toDouble else i }
-      val lkpUDFW6 = udf { (i: Double) => if (i == 0) Median(26).toString().toDouble else i }
-      val lkpUDFW7 = udf { (i: Double) => if (i == 0) Median(27).toString().toDouble else i }
-      val lkpUDFW8 = udf { (i: Double) => if (i == 0) Median(28).toString().toDouble else i }
-
-      //1.
-      val df26 = df25.withColumn("FinalW1languagewordratio", lkpUDFW1(col("W1languagewordratio"))) //.drop("W1languagewordratio").cache()
-
-      //2.Boolean(Double) IsContainLanguageWord
-
-      //3.
-      val df27 = df26.withColumn("FinalW3lowercaseratio", lkpUDFW3(col("W3lowercaseratio"))) //.drop("W3lowercaseratio").cache()
-      // df26.unpersist()
-
-      //4. Integer " Mean:
-      val Mean_W4longestword = Samples.agg(mean("W4longestword")).head()
-      val W4_Mean = Mean_W4longestword.getDouble(0)
-      val lkpUDFW4 = udf { (i: Double) => if (i == 0) W4_Mean else i }
-      val df28 = df27.withColumn("FinalW4longestword", lkpUDFW4(col("W4longestword")))
-
-      //5. Boolean (Double ) W5IscontainURL
-      //6.
-      val df29 = df28.withColumn("FinalW6badwordratio", lkpUDFW6(col("W6badwordratio"))) //.drop("W6badwordratio").cache()
-
-      //7.
-      val df30 = df29.withColumn("FinalW7uppercaseratio", lkpUDFW7(col("W7uppercaseratio"))) //.drop("W7uppercaseratio").cache()
-
-      //8.
-      val df31 = df30.withColumn("FinalW8banwordratio", lkpUDFW8(col("W8banwordratio"))) //.drop("W8banwordratio").cache()
-
-      //9.FemalFirst       Boolean(Double)
-      //10.Male First      Boolean(Double)
-      //11.ContainBadWord  Boolean(Double)
-      //12ContainBanWord   Boolean(Double)
-
-      //13. Integer(Double):
-      val Mean_W13W13NumberSharewords = Samples.agg(mean("W13NumberSharewords")).head()
-      val W13_Mean = Mean_W13W13NumberSharewords.getDouble(0)
-      val lkpUDFW13 = udf { (i: Double) => if (i == 0) W13_Mean else i }
-      val df32 = df31.withColumn("FinalW13NumberSharewords", lkpUDFW13(col("W13NumberSharewords")))
-
-      //14. Integer (Double):
-      val Mean_W14NumberSharewordswithoutStopwords = Samples.agg(mean("W14NumberSharewordswithoutStopwords")).head()
-      val W14_Mean = Mean_W14NumberSharewordswithoutStopwords.getDouble(0)
-      val lkpUDFW14 = udf { (i: Double) => if (i == 0) W14_Mean else i }
-      val df33 = df32.withColumn("FinalW14NumberSharewordswithoutStopwords", lkpUDFW14(col("W14NumberSharewordswithoutStopwords")))
-
-      // 15. Double (Not ratio):
-      val Mean_W15PortionQid = Samples.agg(mean("W15PortionQid")).head()
-      val W15_Mean = Mean_W15PortionQid.getDouble(0)
-      val lkpUDFW15 = udf { (i: Double) => if (i == 0) W15_Mean else i }
-      val df34 = df33.withColumn("FinalW15PortionQid", lkpUDFW15(col("W15PortionQid")))
-
-      //16. Double(Not Ratio):
-      val Mean_W16PortionLnags = Samples.agg(mean("W16PortionLnags")).head()
-      val W16_Mean = Mean_W16PortionLnags.getDouble(0)
-      val lkpUDFW16 = udf { (i: Double) => if (i == 0) W16_Mean else i }
-      val df35 = df34.withColumn("FinalW16PortionLnags", lkpUDFW16(col("W16PortionLnags")))
-
-      //17.Double(Not ratio):
-      val Mean_W17PortionLinks = Samples.agg(mean("W17PortionLinks")).head()
-      val W17_Mean = Mean_W17PortionLinks.getDouble(0)
-      val lkpUDFW17 = udf { (i: Double) => if (i == 0) W17_Mean else i }
-      val df36 = df35.withColumn("FinalW17PortionLinks", lkpUDFW17(col("W17PortionLinks")))
-
-      //************************************************End Word  Features ****************************************************************************************
-
-      //************************************************Start Sentences  Features ****************************************************************************************
-      // 1. Integer(Double)
-      val Mean_S1CommentTailLength = Samples.agg(mean("S1CommentTailLength")).head()
-      val S1_Mean = RoundDouble(Mean_S1CommentTailLength.getDouble(0))
-      val lkpUDFS1 = udf { (i: Double) => if (i == 0) S1_Mean else i }
-      val df37 = df36.withColumn("FinalS1CommentTailLength", lkpUDFS1(col("S1CommentTailLength")))
-
-      //2. Double  but Not ratio values :
-      val Mean_S2SimikaritySitelinkandLabel = Samples.agg(mean("S2SimikaritySitelinkandLabel")).head()
-      val S2_Mean = RoundDouble(Mean_S2SimikaritySitelinkandLabel.getDouble(0))
-      val lkpUDFS2 = udf { (i: Double) => if (i == 0) S2_Mean else i }
-      val df39 = df37.withColumn("FinalS2SimikaritySitelinkandLabel", lkpUDFS2(col("S2SimikaritySitelinkandLabel")))
-
-      //3. Double  but Not ratio values :
-      val Mean_S3SimilarityLabelandSitelink = Samples.agg(mean("S3SimilarityLabelandSitelink")).head()
-      val S3_Mean = RoundDouble(Mean_S3SimilarityLabelandSitelink.getDouble(0))
-      val lkpUDFS3 = udf { (i: Double) => if (i == 0.0) S3_Mean else i }
-      val df40 = df39.withColumn("FinalS3SimilarityLabelandSitelink", lkpUDFS3(col("S3SimilarityLabelandSitelink")))
-
-      //4.  Double  but Not ratio values :
-      val Mean_S4SimilarityCommentComment = Samples.agg(mean("S4SimilarityCommentComment")).head()
-      val S4_Mean = RoundDouble(Mean_S4SimilarityCommentComment.getDouble(0))
-      val lkpUDFS4 = udf { (i: Double) => if (i == 0.0) S4_Mean else i }
-      val df41 = df40.withColumn("FinalS4SimilarityCommentComment", lkpUDFS4(col("S4SimilarityCommentComment")))
-
-      //df41.show()
-      //************************************************End Sentences  Features ****************************************************************************************
-      //*********************************************** Start Statement  Features ****************************************************************************************
-      //1. String
-      //2. String
-      //3. String
-      //************************************************End Statement  Features ****************************************************************************************
-      //*********************************************** Start User Features ****************************************************************************************
-
-      //1.Boolean(Double)
-      //2.Boolean(Double)
-      //3.Boolean(Double)
-      //4.Boolean(Double)
-      //5.Boolean(Double)
-      //6.Boolean(Double)
-      //7. (Double) IP No need to fill Missing Data
-      //8. (Double) ID No need to fill Missing Data
-      //9.Boolean(Double)
-      //10.Boolean(Double)
-
-      //*********************************************** End User Features ****************************************************************************************
-      //*********************************************** Start Item Features ****************************************************************************************
-      //1. Integer (Double) No need to fill missing values
-      //2. Integer (Double) No need to fill missing values
-      //3. Integer (Double) No need to fill missing values
-      //4. Integer (Double) No need to fill missing values
-      //5. Integer (Double) No need to fill missing values
-      //6. Integer (Double) No need to fill missing values
-      //7. Integer (Double) No need to fill missing values
-      //8. Integer (Double) No need to fill missing values
-      //9. Integer (Double) No need to fill missing values
-      //10. Integer (Double) No need to fill missing values
-      //11. String
-      //*********************************************** End Item Features ****************************************************************************************
-      //*********************************************** Start Revision Features ****************************************************************************************
-      //1.String
-      //2.String
-      //3.Boolean (Double)
-      //4.Integer(Double)
-      //5.String
-      //6.String
-      //7. Boolean(Double)
-      //8. String
-      //9.String
-      //10. Integer (Double)
-      //11.String
-      //12. integer(Double)
-      //13. Long(Double)
-      //14. integer (Double)
-      //15.String
-      //16.String
-      //*********************************************** End Revision Features ****************************************************************************************
-      //*********************************************** Meta Data , Truth Data and Frequnces  ****************************************************************************************
-      //Meta
-      // 1.Revision Session :Integer (Converted to Double)
-      //2. User Country Code
-      //3.User Continent Code
-      //4.User Time Size
-      //5.User Region Code
-      //6.User-city Name
-      //7.User Country Name
-      //8.RevisionTags
-
+    // Streaming records:
+    val jobConf = new JobConf()
+    val NormalXML_Parser_OBJ = new ParseNormalXML()
+    val RDD_OBJ = new ParseNormalXML()
+
+    val Testing_RDD_All_Record = RDD_OBJ.Testing_DB_NormalXML_Parser(sc).cache()
+
+    // ======= Json part :
+    // Json RDD : Each record has its Revision iD:
+    val JsonRDD = Testing_RDD_All_Record.map(_.split("NNLL")).map(v => replacing_with_Quoto(v(0), v(8))).cache()
+    // JsonRDD.foreach(println)
+    // println(JsonRDD.count())
+
+    // Data set
+    val Ds_Json = sqlContext.jsonRDD(JsonRDD).select("key", "id", "labels", "descriptions", "aliases", "claims", "sitelinks").cache()
+    // Ds_Json.show()
+    // println(Ds_Json.count())
+
+    // ======= Tags part : // Contributor IP here is in Decimal format not IP format and It is converted in ParseNormalXml stage
+    val TagsRDD = Testing_RDD_All_Record.map(_.split("NNLL")).map(x => (x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11))).cache()
+    val DF_Tags = TagsRDD.toDF("Rid", "Itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "model", "format", "sha").cache()
+    //    DF_Tags.show()
+    //    println(DF_Tags.count())
+
+    // ======== Join Json part with Tag Part:============================
+    // Joining to have full data
+    val DF_First_DF_Result_Join_Tags_and_Json = DF_Tags.as("T1").join(Ds_Json.as("T2"), $"T1.Rid" === $"T2.key", "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha") // .orderBy("Rid", "Itemid")
+    DF_First_DF_Result_Join_Tags_and_Json.registerTempTable("Data1")
+    val dfr_DATA_JsonTages1 = sqlContext.sql("select * from Data1 order by itemid ,Rid ").cache()
+
+    val colNames = Seq("Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
+    val DF_Second = DF_First_DF_Result_Join_Tags_and_Json.toDF(colNames: _*) //.distinct()
+    DF_Second.registerTempTable("Data2")
+
+    // ===================================================================Parent // Previous Revision==============================================================================================================
+    // val DF_Joined = result1.as("df1").join(result2.as("df2"), col("itemid") === col("itemid2") && col("index1") === col("index2") + 1, "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha", "Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
+    // .select("itemid", "Rid","pid","time","itemid2","Rid2","pid2","time2")
+
+    // Joining based on Parent Id to get the previous cases: ParentID
+    val DF_Joined = DF_First_DF_Result_Join_Tags_and_Json.as("df1").join(DF_Second.as("df2"), $"df1.pid" === $"df2.Rid2", "leftouter").distinct()
+
+    val RDD_After_JoinDF = DF_Joined.rdd.distinct()
+    val x = RDD_After_JoinDF.map(row => (row(0).toString().toInt, row)).cache()
+    val part = new RangePartitioner(4, x)
+    val partitioned = x.partitionBy(part).persist() // persist is important for this case and obligatory.
+    // partitioned.foreach(println)
+    //
+    //      //=====================================================All Features Based on Categories of Features Data Type :==================================================================================
+    //
+    val Result_all_Features = partitioned.map { case (x, y) => (x.toString() + "," + All_Features(y).toString()) } // we convert the Pair RDD to String one LineRDD to be able to make DF based on ","
+    // Result_all_Features.foreach(println)
+    // println("nayef" + Result_all_Features.count())
+
+    // Conver the RDD of All Features to  DataFrame:
+
+    val schema = StructType(
+
+      // 0
+      StructField("Rid", IntegerType, false) ::
+
+        // Character Features :
+        /* 1*/ StructField("C1uppercaseratio", DoubleType, false) :: /* 2 */ StructField("C2lowercaseratio", DoubleType, false) :: /*3*/ StructField("C3alphanumericratio", DoubleType, false) ::
+        /* 4 */ StructField("C4asciiratio", DoubleType, false) :: /* 5 */ StructField("C5bracketratio", DoubleType, false) :: /*6*/ StructField("C6digitalratio", DoubleType, false) ::
+        /* 7 */ StructField("C7latinratio", DoubleType, false) :: /* 8 */ StructField("C8whitespaceratio", DoubleType, false) :: /* 9*/ StructField("C9puncratio", DoubleType, false) ::
+        /* 10 */ StructField("C10longcharacterseq", DoubleType, false) :: /* 11 */ StructField("C11arabicratio", DoubleType, false) :: /*12*/ StructField("C12bengaliratio", DoubleType, false) ::
+        /* 13 */ StructField("C13brahmiratio", DoubleType, false) :: /* 14 */ StructField("C14cyrilinratio", DoubleType, false) :: /*15*/ StructField("C15hanratio", DoubleType, false) ::
+        /* 16 */ StructField("c16malysiaratio", DoubleType, false) :: /* 17 */ StructField("C17tamiratio", DoubleType, false) :: /*18*/ StructField("C18telugratio", DoubleType, false) ::
+        /* 19 */ StructField("C19symbolratio", DoubleType, false) :: /* 20 */ StructField("C20alpharatio", DoubleType, false) :: /*21*/ StructField("C21visibleratio", DoubleType, false) ::
+        /* 22 */ StructField("C22printableratio", DoubleType, false) :: /* 23 */ StructField("C23blankratio", DoubleType, false) :: /*24 */ StructField("C24controlratio", DoubleType, false) ::
+        /* 25 */ StructField("C25hexaratio", DoubleType, false) ::
+
+        // word Features:
+        /* 26 */ StructField("W1languagewordratio", DoubleType, false) :: /* 27 Boolean */ StructField("W2Iscontainlanguageword", DoubleType, false) :: /*28*/ StructField("W3lowercaseratio", DoubleType, false) ::
+        /* 29 Integer */ StructField("W4longestword", IntegerType, false) :: /* 30 Boolean */ StructField("W5IscontainURL", DoubleType, false) :: /*31*/ StructField("W6badwordratio", DoubleType, false) ::
+        /* 32 */ StructField("W7uppercaseratio", DoubleType, false) :: /* 33 */ StructField("W8banwordratio", DoubleType, false) :: /*34 Boolean */ StructField("W9FemalFirstName", DoubleType, false) ::
+        /* 35 Boolean */ StructField("W10MaleFirstName", DoubleType, false) :: /* 36 Boolean */ StructField("W11IscontainBadword", DoubleType, false) :: /*37 Boolean*/ StructField("W12IsContainBanword", DoubleType, false) ::
+        /* 38 integer */ StructField("W13NumberSharewords", DoubleType, false) :: /* 39 Integer */ StructField("W14NumberSharewordswithoutStopwords", DoubleType, false) ::
+        /* 40 */ StructField("W15PortionQid", DoubleType, false) :: /* 41 */ StructField("W16PortionLnags", DoubleType, false) :: /*42*/ StructField("W17PortionLinks", DoubleType, false) ::
+
+        //
+        //          // Sentences Features:
+        /* 43 */ StructField("S1CommentTailLength", DoubleType, false) :: /* 44 */ StructField("S2SimikaritySitelinkandLabel", DoubleType, false) :: /*45*/ StructField("S3SimilarityLabelandSitelink", DoubleType, false) :: /*46*/ StructField("S4SimilarityCommentComment", DoubleType, false) ::
+        //
+        //          // Statements Features :
+        /* 47 */ StructField("SS1Property", StringType, false) :: /* 48 */ StructField("SS2DataValue", StringType, false) :: /*49*/ StructField("SS3ItemValue", StringType, false) ::
+        //
+        //
+        //        //User Features :
+        /* 50 Boolean */ StructField("U1IsPrivileged", DoubleType, false) :: /* 51 Boolean */ StructField("U2IsBotUser", DoubleType, false) :: /*52 Boolean*/ StructField("U3IsBotuserWithFlaguser", DoubleType, false) ::
+        /*53 Boolean */ StructField("U4IsProperty", DoubleType, false) :: /* 54 Boolean */ StructField("U5IsTranslator", DoubleType, false) :: /*55 Boolean*/ StructField("U6IsRegister", DoubleType, false) ::
+        /* 56 */ StructField("U7IPValue", DoubleType, false) :: /* 57 */ StructField("U8UserID", IntegerType, false) :: /*58*/ StructField("U9HasBirthDate", DoubleType, false) :: /*59*/ StructField("U10HasDeathDate", DoubleType, false) ::
+
+        // Items Features :
+
+        /* 60 */ StructField("I1NumberLabels", DoubleType, false) :: /* 61 */ StructField("I2NumberDescription", DoubleType, false) :: /*62*/ StructField("I3NumberAliases", DoubleType, false) :: /*63*/ StructField("I4NumberClaims", DoubleType, false) ::
+        /* 64 */ StructField("I5NumberSitelinks", DoubleType, false) :: /* 65 */ StructField("I6NumberStatement", DoubleType, false) :: /*66*/ StructField("I7NumberReferences", DoubleType, false) :: /*67*/ StructField("I8NumberQualifier", DoubleType, false) ::
+        /* 68 */ StructField("I9NumberQualifierOrder", DoubleType, false) :: /* 69 */ StructField("I10NumberBadges", DoubleType, false) :: /*70*/ StructField("I11ItemTitle", StringType, false) ::
+
+        // Revision Features:
+        /* 71 */ StructField("R1languageRevision", StringType, false) :: /* 72 */ StructField("R2RevisionLanguageLocal", StringType, false) :: /*73*/ StructField("R3IslatainLanguage", DoubleType, false) ::
+        /* 74 */ StructField("R4JsonLength", DoubleType, false) :: /* 75 */ StructField("R5RevisionAction", StringType, false) :: /*76*/ StructField("R6PrevReviAction", StringType, false) ::
+        /* 77 */ StructField("R7RevisionAccountChange", DoubleType, false) :: /* 78 */ StructField("R8ParRevision", StringType, false) :: /*79*/ StructField("R9RevisionTime", StringType, false) ::
+        /* 80 */ StructField("R10RevisionSize", DoubleType, false) :: /* 81 */ StructField("R11ContentType", StringType, false) :: /*82*/ StructField("R12BytesIncrease", DoubleType, false) ::
+        /* 83 */ StructField("R13TimeSinceLastRevi", DoubleType, false) :: /* 84 */ StructField("R14CommentLength", DoubleType, false) :: /*85*/ StructField("R15RevisionSubaction", StringType, false) ::
+        /* 86 */ StructField("R16PrevReviSubaction", StringType, false) ::
+
+        Nil)
+
+    val rowRDD = Result_all_Features.map(line => line.split(",")).map(e ⇒ Row(e(0).toInt // character feature column
+    , e(1).toDouble, e(2).toDouble, e(3).toDouble, e(4).toDouble, e(5).toDouble, e(6).toDouble, e(7).toDouble, e(8).toDouble, e(9).toDouble, RoundDouble(e(10).toDouble),
+      e(11).toDouble, e(12).toDouble, e(13).toDouble, e(14).toDouble, e(15).toDouble, e(16).toDouble, e(17).toDouble, e(18).toDouble, e(19).toDouble, e(20).toDouble, e(21).toDouble, e(22).toDouble, e(23).toDouble, e(24).toDouble, e(25).toDouble //Word Feature column
+      , e(26).toDouble, e(27).toDouble, e(28).toDouble, e(29).toDouble.toInt, e(30).toDouble, e(31).toDouble, e(32).toDouble, e(33).toDouble, e(34).toDouble, e(35).toDouble, e(36).toDouble, e(37).toDouble, RoundDouble(e(38).toDouble), RoundDouble(e(39).toDouble), e(40).toDouble, e(41).toDouble, e(42).toDouble // Sentences Features column:
+      , RoundDouble(e(43).toDouble), e(44).toDouble, e(45).toDouble, e(46).toDouble //Statement Features Column: 
+      , e(47), e(48), e(49) // User Features Column:
+      , e(50).toDouble, e(51).toDouble, e(52).toDouble, e(53).toDouble, e(54).toDouble, e(55).toDouble, e(56).toDouble, e(57).toDouble.toInt, e(58).toDouble, e(59).toDouble //Item Features column:
+      , e(60).toDouble, e(61).toDouble, e(62).toDouble, e(63).toDouble, e(64).toDouble, e(65).toDouble, e(66).toDouble, e(67).toDouble, e(68).toDouble, e(69).toDouble, "Q" + e(70).toDouble.toInt.toString() //Revision Features Column:
+      , e(71), e(72), e(73).toDouble, e(74).toDouble, e(75), e(76), e(77).toDouble, e(78), e(79), e(80).toDouble, e(81), e(82).toDouble, e(83).toDouble, e(84).toDouble, e(85), e(86)))
+
+    // a.User Frequency:
+    // number of revisions a user has contributed
+    // val resu= DF_Tags.groupBy("contributorID").agg(count("Rid"))
+    DF_Tags.registerTempTable("TagesTable")
+    val ContributorFreq_for_Each_Revision_DF = sqlContext.sql("select contributorID as CIDUSER1, count(Rid) as NumberofRevisionsUserContributed from TagesTable where contributorID !='0' group by contributorID ") //.drop("CIDUSER1")
+    // ContributorFreq_for_Each_Revision_DF.show()
+
+    // b.Cumulated : Number of a unique Item a user has contributed.
+    val CumulatedNumberof_uniqueItemsForUser_DF = sqlContext.sql("select contributorID as CIDUSER2,  COUNT(DISTINCT itemid) as NumberofUniqueItemsUseredit from TagesTable where contributorID !='0' group by contributorID") //.drop("CIDUSER2")
+    // CumulatedNumberof_uniqueItemsForUser_DF.show()
+
+    // 1.Item Frequency:
+    // number of revisions an Item has
+    val ItemFrequ_DF = sqlContext.sql("select itemid, count(Rid) as NumberRevisionItemHas from TagesTable  group by itemid")
+    // ItemFrequ_DF.show()
+
+    // 2. Cumulate number of unique users have edited the Item : Did not consider the users IP. Contributor is an IP or Name. we consider name
+    val CumulatedNumberof_UniqueUserForItem_DF = sqlContext.sql("select itemid,  COUNT(DISTINCT contributorID) as NumberUniqUserEditItem from TagesTable where contributorID !='0' group by itemid")
+    // CumulatedNumberof_UniqueUserForItem_DF.show()
+
+    // 3. freq each Item :
+    val Fre_Item_DF = sqlContext.sql("select itemid,  COUNT(itemid) as FreqItem from TagesTable  group by itemid")
+    // Fre_Item_DF.show()
+
+    // *****************************************************************************************************************************************
+    // This is Main DataFrame:
+    val BeforeJoin_All_Features = sqlContext.createDataFrame(rowRDD, schema)
+    // BeforeJoin_All_Features.show()
+
+    // ********************************** User feature Join
+
+    // Join1 for add The first User Feature : number of revisions a user has contributed
+    val AfterJoinUser1_All_Features = BeforeJoin_All_Features.as("T1").join(ContributorFreq_for_Each_Revision_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER1", "leftouter").drop("CIDUSER1")
+    // AfterJoinUser1_All_Features.show()
+
+    // Join2 for add The second  User Feature
+    val AfterJoinUser2_All_Features = AfterJoinUser1_All_Features.as("T1").join(CumulatedNumberof_uniqueItemsForUser_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER2", "leftouter").drop("CIDUSER2")
+    // AfterJoinUser2_All_Features.show()
+
+    // ********************************** Item Feature Join
+    // Join3 for add The First  Item Feature :number of revisions an Item has
+    val AfterJoinItem3_All_Features = AfterJoinUser2_All_Features.as("T1").join(ItemFrequ_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // AfterJoinItem3_All_Features.show()
+
+    // Join4 for add The Second  Item Feature
+    val AfterJoinItem4_All_Features = AfterJoinItem3_All_Features.as("T1").join(CumulatedNumberof_UniqueUserForItem_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // AfterJoinItem4_All_Features.show()
+
+    // Join5 for add The Third  Item Feature
+    val AfterJoinItem5_All_Features = AfterJoinItem4_All_Features.as("T1").join(Fre_Item_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
+    // 2 AfterJoinItem5_All_Features.show()
+
+    // ********************************
+
+    // *Geografical information Feature from Meta File
+    // REVISION_ID|REVISION_SESSION_ID|USER_COUNTRY_CODE|USER_CONTINENT_CODE|USER_TIME_ZONE|USER_REGION_CODE|USER_CITY_NAME|USER_COUNTY_NAME|REVISION_TAGS
+    val df_GeoInf = sqlContext.read
+      .format("com.databricks.spark.csv")
+      .option("header", "true") // Use first line of all files as header
+      .option("inferSchema", "true") // Automatically infer data types
+      .load("hdfs://localhost:9000/mydata/Meta.csv").select("REVISION_ID", "REVISION_SESSION_ID", "USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")
+    // df_GeoInf.show()
+
+    val df_Truth = sqlContext.read
+      .format("com.databricks.spark.csv")
+      .option("header", "true") // Use first line of all files as header
+      .option("inferSchema", "true") // Automatically infer data types
+      .load("hdfs://localhost:9000/mydata/truth.csv").select("REVISION_ID", "ROLLBACK_REVERTED", "UNDO_RESTORE_REVERTED")
+    // df_GeoInf.show()
+
+    val AfterJoinGeoInfo_All_Features = AfterJoinItem5_All_Features.as("T1").join(df_GeoInf.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
+    // AfterJoinGeoInfo_All_Features.show()
+
+    val Final_All_Features = AfterJoinGeoInfo_All_Features.as("T1").join(df_Truth.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
+    // Final_All_Features.show()
+
+    // Pre- process Data ============================================================================================================================================================
+
+    // For String Column, We fill the Null values by "NA":
+
+    var Fill_Missing_Final_All_Features = Final_All_Features.na.fill("NA", Seq("USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")).cache()
+
+    // For Integer Frequency  Column, We fill the Null values by 0:
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.na.fill(0, Seq("FreqItem", "NumberUniqUserEditItem", "NumberRevisionItemHas", "NumberofUniqueItemsUseredit", "NumberofRevisionsUserContributed", "REVISION_SESSION_ID")).cache()
+    // Fill_Missing_Final_All_Features.show()
+
+    val BoolToDoubleUDF = udf { (BoolAsString: String) => if (BoolAsString == "T") 1.0 else 0.0 }
+    val IntegerToDouble = udf { (IntegerRevisionSessionID: Integer) => IntegerRevisionSessionID.toDouble }
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalROLLBACK_REVERTED", BoolToDoubleUDF(col("ROLLBACK_REVERTED")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalUNDO_RESTORE_REVERTED", BoolToDoubleUDF(col("UNDO_RESTORE_REVERTED")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalREVISION_SESSION_ID", IntegerToDouble(col("REVISION_SESSION_ID")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofRevisionsUserContributed", IntegerToDouble(col("NumberofRevisionsUserContributed")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofUniqueItemsUseredit", IntegerToDouble(col("NumberofUniqueItemsUseredit")))
+
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberRevisionItemHas", IntegerToDouble(col("NumberRevisionItemHas")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberUniqUserEditItem", IntegerToDouble(col("NumberUniqUserEditItem")))
+    Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalFreqItem", IntegerToDouble(col("FreqItem")))
+
+    // ===========================================================================Caharacter Features : Double , Integer Features ====================================================================================
+    // Double Ratio:  For Ratio Double column, Fill -1 value by Median:Character Features + Ratio of Word Features :
+    var Samples = Fill_Missing_Final_All_Features.sample(false, 0.001).cache() //.where($"S2SimikaritySitelinkandLabel">0.0 || $"S3SimilarityLabelandSitelink">0.0 || $"S4SimilarityCommentComment">0.0)
+    Samples.registerTempTable("df")
+
+    val Query = "select " +
+      "percentile_approx(C1uppercaseratio, 0.5) as meadian1" + "," + "percentile_approx(C2lowercaseratio, 0.5) as median2" + " ," +
+      "percentile_approx(C3alphanumericratio, 0.5) as median3" + "," + "percentile_approx(C4asciiratio, 0.5) as median4" + "," +
+      "percentile_approx(C5bracketratio, 0.5) as median5" + "," + "percentile_approx(C6digitalratio, 0.5) as median6" + "," +
+      "percentile_approx(C7latinratio, 0.5) as median7" + "," + "percentile_approx(C8whitespaceratio, 0.5) as median8" + "," +
+      "percentile_approx(C9puncratio, 0.5) as median9" + "," + "percentile_approx(C11arabicratio, 0.5) as median11" + "," +
+      "percentile_approx(C12bengaliratio, 0.5) as median12" + "," + "percentile_approx(C13brahmiratio, 0.5) as median13" + "," +
+      "percentile_approx(C14cyrilinratio, 0.5) as median14" + "," + "percentile_approx(C15hanratio, 0.5) as median15" + "," +
+      "percentile_approx(c16malysiaratio, 0.5) as median16" + "," +
+      "percentile_approx(C17tamiratio, 0.5) as median17" + "," + "percentile_approx(C18telugratio, 0.5) as median18" + "," +
+      "percentile_approx(C19symbolratio, 0.5) as median19" + "," + "percentile_approx(C20alpharatio, 0.5) as median20" + "," +
+      "percentile_approx(C21visibleratio, 0.5) as median21" + "," + "percentile_approx(C22printableratio, 0.5) as median22" + "," +
+      "percentile_approx(C23blankratio, 0.5) as median23" + "," + "percentile_approx(C24controlratio, 0.5) as median24" + "," +
+      "percentile_approx(C25hexaratio, 0.5) as median25" ++ "," + "percentile_approx(W1languagewordratio, 0.5) as median26" + "," +
+      "percentile_approx(W3lowercaseratio, 0.5) as median27" + "," + "percentile_approx(W6badwordratio, 0.5) as median28" + "," +
+      "percentile_approx(W7uppercaseratio, 0.5) as median27" + "," + "percentile_approx(W8banwordratio, 0.5) as median27" + " from df"
+
+    val medianValues = sqlContext.sql(Query).rdd
+    val Median = medianValues.first()
+
+    // Median :
+    // Character Ratio Features: UDF
+    val lkpUDF1 = udf { (i: Double) => if (i == 0) Median(0).toString().toDouble else i }
+    val lkpUDF2 = udf { (i: Double) => if (i == 0) Median(1).toString().toDouble else i }
+    val lkpUDF3 = udf { (i: Double) => if (i == 0) Median(2).toString().toDouble else i }
+    val lkpUDF4 = udf { (i: Double) => if (i == 0) Median(3).toString().toDouble else i }
+    val lkpUDF5 = udf { (i: Double) => if (i == 0) Median(4).toString().toDouble else i }
+    val lkpUDF6 = udf { (i: Double) => if (i == 0) Median(5).toString().toDouble else i }
+    val lkpUDF7 = udf { (i: Double) => if (i == 0) Median(6).toString().toDouble else i }
+    val lkpUDF8 = udf { (i: Double) => if (i == 0) Median(7).toString().toDouble else i }
+    val lkpUDF9 = udf { (i: Double) => if (i == 0) Median(8).toString().toDouble else i }
+
+    val lkpUDF11 = udf { (i: Double) => if (i == 0) Median(9).toString().toDouble else i }
+    val lkpUDF12 = udf { (i: Double) => if (i == 0) Median(10).toString().toDouble else i }
+    val lkpUDF13 = udf { (i: Double) => if (i == 0) Median(11).toString().toDouble else i }
+    val lkpUDF14 = udf { (i: Double) => if (i == 0) Median(12).toString().toDouble else i }
+    val lkpUDF15 = udf { (i: Double) => if (i == 0) Median(13).toString().toDouble else i }
+    val lkpUDF16 = udf { (i: Double) => if (i == 0) Median(14).toString().toDouble else i }
+    val lkpUDF17 = udf { (i: Double) => if (i == 0) Median(15).toString().toDouble else i }
+    val lkpUDF18 = udf { (i: Double) => if (i == 0) Median(16).toString().toDouble else i }
+    val lkpUDF19 = udf { (i: Double) => if (i == 0) Median(17).toString().toDouble else i }
+    val lkpUDF20 = udf { (i: Double) => if (i == 0) Median(18).toString().toDouble else i }
+    val lkpUDF21 = udf { (i: Double) => if (i == 0) Median(19).toString().toDouble else i }
+    val lkpUDF22 = udf { (i: Double) => if (i == 0) Median(20).toString().toDouble else i }
+    val lkpUDF23 = udf { (i: Double) => if (i == 0) Median(21).toString().toDouble else i }
+    val lkpUDF24 = udf { (i: Double) => if (i == 0) Median(22).toString().toDouble else i }
+    val lkpUDF25 = udf { (i: Double) => if (i == 0) Median(23).toString().toDouble else i }
+
+    val df1 = Fill_Missing_Final_All_Features.withColumn("FinalC1uppercaseratio", lkpUDF1(col("C1uppercaseratio"))) // .drop("C1uppercaseratio").cache()
+    val df2 = df1.withColumn("FinalC2lowercaseratio", lkpUDF2(col("C2lowercaseratio"))) // .drop("C2lowercaseratio").cache()
+    // df1.unpersist()
+    val df3 = df2.withColumn("FinalC3alphanumericratio", lkpUDF3(col("C3alphanumericratio"))) // .drop("C3alphanumericratio").cache()
+    // df2.unpersist()
+    val df4 = df3.withColumn("FinalC4asciiratio", lkpUDF4(col("C4asciiratio"))) // .drop("C4asciiratio").cache()
+    // df3.unpersist()
+    val df5 = df4.withColumn("FinalC5bracketratio", lkpUDF5(col("C5bracketratio"))) // .drop("C5bracketratio").cache()
+    // df4.unpersist()
+    val df6 = df5.withColumn("FinalC6digitalratio", lkpUDF6(col("C6digitalratio"))) // .drop("C6digitalratio").cache()
+    // df5.unpersist()
+    val df7 = df6.withColumn("FinalC7latinratio", lkpUDF7(col("C7latinratio"))) // .drop("C7latinratio").cache()
+    // df6.unpersist()
+    val df8 = df7.withColumn("FinalC8whitespaceratio", lkpUDF8(col("C8whitespaceratio"))) // .drop("C8whitespaceratio").cache()
+    // df7.unpersist()
+    val df9 = df8.withColumn("FinalC9puncratio", lkpUDF9(col("C9puncratio"))) // .drop("C9puncratio").cache()
+
+    // Mean :
+    // character integer values :
+    val Mean_C10longcharacterseq = Samples.agg(mean("C10longcharacterseq")).head()
+    val C10_Mean = Mean_C10longcharacterseq.getDouble(0)
+    val lkpUDFC10 = udf { (i: Double) => if (i == 0) C10_Mean else i }
+    val df10 = df9.withColumn("FinalC10longcharacterseq", lkpUDFC10(col("C10longcharacterseq")))
+
+    // Median
+    val df11 = df10.withColumn("FinalC11arabicratio", lkpUDF11(col("C11arabicratio"))) // .drop("C11arabicratio").cache()
+    // df9.unpersist()
+    val df12 = df11.withColumn("FinalC12bengaliratio", lkpUDF12(col("C12bengaliratio"))) // .drop("C12bengaliratio").cache()
+    // df11.unpersist()
+    val df13 = df12.withColumn("FinalC13brahmiratio", lkpUDF13(col("C13brahmiratio"))) // .drop("C13brahmiratio").cache()
+    // df12.unpersist()
+    val df14 = df13.withColumn("FinalC14cyrilinratio", lkpUDF14(col("C14cyrilinratio"))) // .drop("C14cyrilinratio").cache()
+    // df13.unpersist()
+    val df15 = df14.withColumn("FinalC15hanratio", lkpUDF15(col("C15hanratio"))) // .drop("C15hanratio").cache()
+    // df14.unpersist()
+    val df16 = df15.withColumn("Finalc16malysiaratio", lkpUDF16(col("c16malysiaratio"))) // .drop("c16malysiaratio").cache()
+    // df15.unpersist()
+    val df17 = df16.withColumn("FinalC17tamiratio", lkpUDF17(col("C17tamiratio"))) // .drop("C17tamiratio").cache()
+    //  df16.unpersist()
+    val df18 = df17.withColumn("FinalC18telugratio", lkpUDF18(col("C18telugratio"))) // .drop("C18telugratio").cache()
+    // df17.unpersist()
+    val df19 = df18.withColumn("FinalC19symbolratio", lkpUDF19(col("C19symbolratio"))) // .drop("C19symbolratio").cache()
+    //df18.unpersist()
+    val df20 = df19.withColumn("FinalC20alpharatio", lkpUDF20(col("C20alpharatio"))) // .drop("C20alpharatio").cache()
+    // df19.unpersist()
+    val df21 = df20.withColumn("FinalC21visibleratio", lkpUDF21(col("C21visibleratio"))) // .drop("C21visibleratio").cache()
+    // df20.unpersist()
+    val df22 = df21.withColumn("FinalC22printableratio", lkpUDF22(col("C22printableratio"))) // .drop("C22printableratio").cache()
+    // df21.unpersist()
+    val df23 = df22.withColumn("FinalC23blankratio", lkpUDF23(col("C23blankratio"))) // .drop("C23blankratio").cache()
+    // df22.unpersist()
+    val df24 = df23.withColumn("FinalC24controlratio", lkpUDF24(col("C24controlratio"))) // .drop("C24controlratio").cache()
+    // df23.unpersist()
+    val df25 = df24.withColumn("FinalC25hexaratio", lkpUDF25(col("C25hexaratio"))) // .drop("C25hexaratio").cache()
+
+    // ************************************************End Character Features ****************************************************************************************
+
+    // ************************************************Start Word  Features ****************************************************************************************
+
+    // Word Ratio Features : UDF
+    val lkpUDFW1 = udf { (i: Double) => if (i == 0) Median(24).toString().toDouble else i }
+    val lkpUDFW3 = udf { (i: Double) => if (i == 0) Median(25).toString().toDouble else i }
+    val lkpUDFW6 = udf { (i: Double) => if (i == 0) Median(26).toString().toDouble else i }
+    val lkpUDFW7 = udf { (i: Double) => if (i == 0) Median(27).toString().toDouble else i }
+    val lkpUDFW8 = udf { (i: Double) => if (i == 0) Median(28).toString().toDouble else i }
+
+    // 1.
+    val df26 = df25.withColumn("FinalW1languagewordratio", lkpUDFW1(col("W1languagewordratio"))) //.drop("W1languagewordratio").cache()
+
+    // 2.Boolean(Double) IsContainLanguageWord
+
+    // 3.
+    val df27 = df26.withColumn("FinalW3lowercaseratio", lkpUDFW3(col("W3lowercaseratio"))) //.drop("W3lowercaseratio").cache()
+    // df26.unpersist()
+
+    // 4. Integer " Mean:
+    val Mean_W4longestword = Samples.agg(mean("W4longestword")).head()
+    val W4_Mean = Mean_W4longestword.getDouble(0)
+    val lkpUDFW4 = udf { (i: Double) => if (i == 0) W4_Mean else i }
+    val df28 = df27.withColumn("FinalW4longestword", lkpUDFW4(col("W4longestword")))
+
+    // 5. Boolean (Double ) W5IscontainURL
+    // 6.
+    val df29 = df28.withColumn("FinalW6badwordratio", lkpUDFW6(col("W6badwordratio"))) //.drop("W6badwordratio").cache()
+
+    // 7.
+    val df30 = df29.withColumn("FinalW7uppercaseratio", lkpUDFW7(col("W7uppercaseratio"))) //.drop("W7uppercaseratio").cache()
+
+    // 8.
+    val df31 = df30.withColumn("FinalW8banwordratio", lkpUDFW8(col("W8banwordratio"))) //.drop("W8banwordratio").cache()
+
+    // 9.FemalFirst       Boolean(Double)
+    // 10.Male First      Boolean(Double)
+    // 11.ContainBadWord  Boolean(Double)
+    // 12ContainBanWord   Boolean(Double)
+
+    // 13. Integer(Double):
+    val Mean_W13W13NumberSharewords = Samples.agg(mean("W13NumberSharewords")).head()
+    val W13_Mean = Mean_W13W13NumberSharewords.getDouble(0)
+    val lkpUDFW13 = udf { (i: Double) => if (i == 0) W13_Mean else i }
+    val df32 = df31.withColumn("FinalW13NumberSharewords", lkpUDFW13(col("W13NumberSharewords")))
+
+    // 14. Integer (Double):
+    val Mean_W14NumberSharewordswithoutStopwords = Samples.agg(mean("W14NumberSharewordswithoutStopwords")).head()
+    val W14_Mean = Mean_W14NumberSharewordswithoutStopwords.getDouble(0)
+    val lkpUDFW14 = udf { (i: Double) => if (i == 0) W14_Mean else i }
+    val df33 = df32.withColumn("FinalW14NumberSharewordswithoutStopwords", lkpUDFW14(col("W14NumberSharewordswithoutStopwords")))
+
+    // 15. Double (Not ratio):
+    val Mean_W15PortionQid = Samples.agg(mean("W15PortionQid")).head()
+    val W15_Mean = Mean_W15PortionQid.getDouble(0)
+    val lkpUDFW15 = udf { (i: Double) => if (i == 0) W15_Mean else i }
+    val df34 = df33.withColumn("FinalW15PortionQid", lkpUDFW15(col("W15PortionQid")))
+
+    // 16. Double(Not Ratio):
+    val Mean_W16PortionLnags = Samples.agg(mean("W16PortionLnags")).head()
+    val W16_Mean = Mean_W16PortionLnags.getDouble(0)
+    val lkpUDFW16 = udf { (i: Double) => if (i == 0) W16_Mean else i }
+    val df35 = df34.withColumn("FinalW16PortionLnags", lkpUDFW16(col("W16PortionLnags")))
+
+    // 17.Double(Not ratio):
+    val Mean_W17PortionLinks = Samples.agg(mean("W17PortionLinks")).head()
+    val W17_Mean = Mean_W17PortionLinks.getDouble(0)
+    val lkpUDFW17 = udf { (i: Double) => if (i == 0) W17_Mean else i }
+    val df36 = df35.withColumn("FinalW17PortionLinks", lkpUDFW17(col("W17PortionLinks")))
+
+    // ************************************************End Word  Features ****************************************************************************************
+
+    // ************************************************Start Sentences  Features ****************************************************************************************
+    // 1. Integer(Double)
+    val Mean_S1CommentTailLength = Samples.agg(mean("S1CommentTailLength")).head()
+    val S1_Mean = RoundDouble(Mean_S1CommentTailLength.getDouble(0))
+    val lkpUDFS1 = udf { (i: Double) => if (i == 0) S1_Mean else i }
+    val df37 = df36.withColumn("FinalS1CommentTailLength", lkpUDFS1(col("S1CommentTailLength")))
+
+    // 2. Double  but Not ratio values :
+    val Mean_S2SimikaritySitelinkandLabel = Samples.agg(mean("S2SimikaritySitelinkandLabel")).head()
+    val S2_Mean = RoundDouble(Mean_S2SimikaritySitelinkandLabel.getDouble(0))
+    val lkpUDFS2 = udf { (i: Double) => if (i == 0) S2_Mean else i }
+    val df39 = df37.withColumn("FinalS2SimikaritySitelinkandLabel", lkpUDFS2(col("S2SimikaritySitelinkandLabel")))
+
+    // 3. Double  but Not ratio values :
+    val Mean_S3SimilarityLabelandSitelink = Samples.agg(mean("S3SimilarityLabelandSitelink")).head()
+    val S3_Mean = RoundDouble(Mean_S3SimilarityLabelandSitelink.getDouble(0))
+    val lkpUDFS3 = udf { (i: Double) => if (i == 0.0) S3_Mean else i }
+    val df40 = df39.withColumn("FinalS3SimilarityLabelandSitelink", lkpUDFS3(col("S3SimilarityLabelandSitelink")))
+
+    // 4.  Double  but Not ratio values :
+    val Mean_S4SimilarityCommentComment = Samples.agg(mean("S4SimilarityCommentComment")).head()
+    val S4_Mean = RoundDouble(Mean_S4SimilarityCommentComment.getDouble(0))
+    val lkpUDFS4 = udf { (i: Double) => if (i == 0.0) S4_Mean else i }
+    val df41 = df40.withColumn("FinalS4SimilarityCommentComment", lkpUDFS4(col("S4SimilarityCommentComment")))
+
+    // df41.show()
+    // ************************************************End Sentences  Features ****************************************************************************************
+    // *********************************************** Start Statement  Features ****************************************************************************************
+    // 1. String
+    // 2. String
+    // 3. String
+    // ************************************************End Statement  Features ****************************************************************************************
+    // *********************************************** Start User Features ****************************************************************************************
+
+    // 1.Boolean(Double)
+    // 2.Boolean(Double)
+    // 3.Boolean(Double)
+    // 4.Boolean(Double)
+    // 5.Boolean(Double)
+    // 6.Boolean(Double)
+    // 7. (Double) IP No need to fill Missing Data
+    // 8. (Double) ID No need to fill Missing Data
+    // 9.Boolean(Double)
+    // 10.Boolean(Double)
+
+    // *********************************************** End User Features ****************************************************************************************
+    // *********************************************** Start Item Features ****************************************************************************************
+    // 1. Integer (Double) No need to fill missing values
+    // 2. Integer (Double) No need to fill missing values
+    // 3. Integer (Double) No need to fill missing values
+    // 4. Integer (Double) No need to fill missing values
+    // 5. Integer (Double) No need to fill missing values
+    // 6. Integer (Double) No need to fill missing values
+    // 7. Integer (Double) No need to fill missing values
+    // 8. Integer (Double) No need to fill missing values
+    // 9. Integer (Double) No need to fill missing values
+    // 10. Integer (Double) No need to fill missing values
+    // 11. String
+    // *********************************************** End Item Features ****************************************************************************************
+    // *********************************************** Start Revision Features ****************************************************************************************
+    // 1.String
+    // 2.String
+    // 3.Boolean (Double)
+    // 4.Integer(Double)
+    // 5.String
+    // 6.String
+    // 7. Boolean(Double)
+    // 8. String
+    // 9.String
+    // 10. Integer (Double)
+    // 11.String
+    // 12. integer(Double)
+    // 13. Long(Double)
+    // 14. integer (Double)
+    // 15.String
+    // 16.String
+    // *********************************************** End Revision Features ****************************************************************************************
+    // *********************************************** Meta Data , Truth Data and Frequnces  ****************************************************************************************
+    // Meta
+    // 1.Revision Session :Integer (Converted to Double)
+    // 2. User Country Code
+    // 3.User Continent Code
+    // 4.User Time Size
+    // 5.User Region Code
+    // 6.User-city Name
+    // 7.User Country Name
+    // 8.RevisionTags
+
+    // Truth:
+    // 1.Undo
+
+    // Freq :
+
+    // 1.5 features
+
+    // Roll Boolean     :Boolean (Double)
+    // Undo             :Boolean (Double)
+
+    // *********************************************** End Revision Features ****************************************************************************************
+
+    // ===========================================================================String Features====================================================================================
+
+    val df42 = df41.withColumn(
+      // statement String features:
+      "StringFeatures", concat($"SS1Property", lit(";"), $"SS2DataValue", lit(";"), $"SS3ItemValue", lit(";"), $"I11ItemTitle",
+        // Revision  String Features:
+        lit(";"), $"R1languageRevision",
+        lit(";"), $"R2RevisionLanguageLocal",
+        lit(";"), $"R5RevisionAction",
+        lit(";"), $"R6PrevReviAction",
+        lit(";"), $"R8ParRevision",
+        lit(";"), $"R9RevisionTime",
+        lit(";"), $"R11ContentType",
+        lit(";"), $"R15RevisionSubaction",
+        lit(";"), $"R16PrevReviSubaction",
+
+        lit(";"), $"USER_COUNTRY_CODE",
+        lit(";"), $"USER_CONTINENT_CODE",
+        lit(";"), $"USER_TIME_ZONE",
+        lit(";"), $"USER_REGION_CODE",
+        lit(";"), $"USER_CITY_NAME",
+        lit(";"), $"USER_COUNTY_NAME",
+        lit(";"), $"REVISION_TAGS"))
+
+    val toArray = udf((record: String) => record.split(";").map(_.toString()))
+    val test1 = df42.withColumn("StringFeatures", toArray(col("StringFeatures")))
+    //  test1.show()
+    //  test1.printSchema()
+
+    val word2Vec = new Word2Vec().setInputCol("StringFeatures").setOutputCol("result").setVectorSize(20).setMinCount(0)
+    val model = word2Vec.fit(test1)
+    val result = model.transform(test1) // .rdd
+
+    // result.show()
+
+    val Todense = udf((b: Vector) => b.toDense)
+    val test_new2 = result.withColumn("result", Todense(col("result")))
+
+    val assembler = new VectorAssembler().setInputCols(Array(
+      "result",
+
+      // character
+      "FinalC1uppercaseratio", "FinalC2lowercaseratio", "FinalC3alphanumericratio", "FinalC4asciiratio", "FinalC5bracketratio", "FinalC6digitalratio",
+      "FinalC7latinratio", "FinalC8whitespaceratio", "FinalC9puncratio", "FinalC10longcharacterseq", "FinalC11arabicratio", "FinalC12bengaliratio",
+      "FinalC13brahmiratio", "FinalC14cyrilinratio", "FinalC15hanratio", "Finalc16malysiaratio", "FinalC17tamiratio", "FinalC18telugratio",
+      "FinalC19symbolratio", "FinalC20alpharatio", "FinalC21visibleratio", "FinalC22printableratio", "FinalC23blankratio", "FinalC24controlratio", "FinalC25hexaratio",
+
+      // Words
+      "FinalW1languagewordratio", "W2Iscontainlanguageword", "FinalW3lowercaseratio", "FinalW4longestword", "W5IscontainURL", "FinalW6badwordratio",
+      "FinalW7uppercaseratio", "FinalW8banwordratio", "W9FemalFirstName", "W10MaleFirstName", "W11IscontainBadword", "W12IsContainBanword",
+      "FinalW13NumberSharewords", "FinalW14NumberSharewordswithoutStopwords", "FinalW15PortionQid", "FinalW16PortionLnags", "FinalW17PortionLinks",
+
+      // Sentences :
+      "FinalS1CommentTailLength", "FinalS2SimikaritySitelinkandLabel", "FinalS3SimilarityLabelandSitelink", "FinalS4SimilarityCommentComment",
+
+      // User :
+      "U1IsPrivileged", "U2IsBotUser", "U3IsBotuserWithFlaguser", "U4IsProperty", "U5IsTranslator", "U6IsRegister", "U7IPValue", "U8UserID",
+      "U9HasBirthDate", "U10HasDeathDate",
+
+      // Item:
+
+      "I1NumberLabels", "I2NumberDescription", "I3NumberAliases", "I4NumberClaims", "I5NumberSitelinks", "I6NumberStatement",
+      "I7NumberReferences", "I8NumberQualifier", "I9NumberQualifierOrder", "I10NumberBadges",
+
+      // Revision:
+      "R3IslatainLanguage", "R4JsonLength", "R7RevisionAccountChange", "R10RevisionSize", "R12BytesIncrease",
+      "R13TimeSinceLastRevi", "R14CommentLength",
+
+      // Meta , truth , Freq
+      // meta :
+      "FinalREVISION_SESSION_ID",
       // Truth:
-      //1.Undo
-
-      // Freq :
-
-      //1.5 features
-
-      // Roll Boolean     :Boolean (Double)
-      // Undo             :Boolean (Double)
-
-      //*********************************************** End Revision Features ****************************************************************************************
-
-      //===========================================================================String Features====================================================================================
-
-      val df42 = df41.withColumn(
-        //statement String features:
-        "StringFeatures", concat($"SS1Property", lit(";"), $"SS2DataValue", lit(";"), $"SS3ItemValue", lit(";"), $"I11ItemTitle",
-          //Revision  String Features:
-          lit(";"), $"R1languageRevision",
-          lit(";"), $"R2RevisionLanguageLocal",
-          lit(";"), $"R5RevisionAction",
-          lit(";"), $"R6PrevReviAction",
-          lit(";"), $"R8ParRevision",
-          lit(";"), $"R9RevisionTime",
-          lit(";"), $"R11ContentType",
-          lit(";"), $"R15RevisionSubaction",
-          lit(";"), $"R16PrevReviSubaction",
-
-          lit(";"), $"USER_COUNTRY_CODE",
-          lit(";"), $"USER_CONTINENT_CODE",
-          lit(";"), $"USER_TIME_ZONE",
-          lit(";"), $"USER_REGION_CODE",
-          lit(";"), $"USER_CITY_NAME",
-          lit(";"), $"USER_COUNTY_NAME",
-          lit(";"), $"REVISION_TAGS"))
+      "FinalUNDO_RESTORE_REVERTED",
 
-      val toArray = udf((record: String) => record.split(";").map(_.toString()))
-      val test1 = df42.withColumn("StringFeatures", toArray(col("StringFeatures")))
-      //  test1.show()
-      //  test1.printSchema()
+      // Freq:
+      "FinalNumberofRevisionsUserContributed",
+      "FinalNumberofUniqueItemsUseredit", "FinalNumberRevisionItemHas", "FinalNumberUniqUserEditItem", "FinalFreqItem")).setOutputCol("features")
+    val Testing_Data = assembler.transform(test_new2)
 
-      val word2Vec = new Word2Vec().setInputCol("StringFeatures").setOutputCol("result").setVectorSize(20).setMinCount(0)
-      val model = word2Vec.fit(test1)
-      val result = model.transform(test1) //.rdd
-
-      // result.show()
-
-      val Todense = udf((b: Vector) => b.toDense)
-      val test_new2 = result.withColumn("result", Todense(col("result")))
-
-      val assembler = new VectorAssembler().setInputCols(Array(
-        "result",
-
-        // character
-        "FinalC1uppercaseratio", "FinalC2lowercaseratio", "FinalC3alphanumericratio", "FinalC4asciiratio", "FinalC5bracketratio", "FinalC6digitalratio",
-        "FinalC7latinratio", "FinalC8whitespaceratio", "FinalC9puncratio", "FinalC10longcharacterseq", "FinalC11arabicratio", "FinalC12bengaliratio",
-        "FinalC13brahmiratio", "FinalC14cyrilinratio", "FinalC15hanratio", "Finalc16malysiaratio", "FinalC17tamiratio", "FinalC18telugratio",
-        "FinalC19symbolratio", "FinalC20alpharatio", "FinalC21visibleratio", "FinalC22printableratio", "FinalC23blankratio", "FinalC24controlratio", "FinalC25hexaratio",
-
-        // Words
-        "FinalW1languagewordratio", "W2Iscontainlanguageword", "FinalW3lowercaseratio", "FinalW4longestword", "W5IscontainURL", "FinalW6badwordratio",
-        "FinalW7uppercaseratio", "FinalW8banwordratio", "W9FemalFirstName", "W10MaleFirstName", "W11IscontainBadword", "W12IsContainBanword",
-        "FinalW13NumberSharewords", "FinalW14NumberSharewordswithoutStopwords", "FinalW15PortionQid", "FinalW16PortionLnags", "FinalW17PortionLinks",
-
-        //Sentences :
-        "FinalS1CommentTailLength", "FinalS2SimikaritySitelinkandLabel", "FinalS3SimilarityLabelandSitelink", "FinalS4SimilarityCommentComment",
-
-        // User :
-        "U1IsPrivileged", "U2IsBotUser", "U3IsBotuserWithFlaguser", "U4IsProperty", "U5IsTranslator", "U6IsRegister", "U7IPValue", "U8UserID",
-        "U9HasBirthDate", "U10HasDeathDate",
-
-        //Item:
-
-        "I1NumberLabels", "I2NumberDescription", "I3NumberAliases", "I4NumberClaims", "I5NumberSitelinks", "I6NumberStatement",
-        "I7NumberReferences", "I8NumberQualifier", "I9NumberQualifierOrder", "I10NumberBadges",
-
-        //Revision:
-        "R3IslatainLanguage", "R4JsonLength", "R7RevisionAccountChange", "R10RevisionSize", "R12BytesIncrease",
-        "R13TimeSinceLastRevi", "R14CommentLength",
-
-        // Meta , truth , Freq
-        // meta :
-        "FinalREVISION_SESSION_ID",
-        // Truth:
-        "FinalUNDO_RESTORE_REVERTED",
-
-        //Freq:
-        "FinalNumberofRevisionsUserContributed",
-        "FinalNumberofUniqueItemsUseredit", "FinalNumberRevisionItemHas", "FinalNumberUniqUserEditItem", "FinalFreqItem")).setOutputCol("features")
-      val Testing_Data = assembler.transform(test_new2)
-
-      // Prepare the data for classification:
+    // Prepare the data for classification:
     //  NewData.registerTempTable("DB")
     //  val Training_Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB")
-     //val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB") // for logistic regrision
+    // val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB") // for logistic regrision
 
-      //Data.show()
+    // Data.show()
 
     //  val TestClassifiers = new Classifiers()
-//
-      //  TestClassifiers.RandomForestClassifer(Testing_Data, sqlContext)
-//      // TestClassifiers.DecisionTreeClassifier(Data, sqlContext)
-//      // TestClassifiers.LogisticRegrision(Data, sqlContext)
-//      // TestClassifiers.GradientBoostedTree(Data, sqlContext)
-//      // TestClassifiers.MultilayerPerceptronClassifier(Data, sqlContext)
+    //
+    //  TestClassifiers.RandomForestClassifer(Testing_Data, sqlContext)
+    //      // TestClassifiers.DecisionTreeClassifier(Data, sqlContext)
+    //      // TestClassifiers.LogisticRegrision(Data, sqlContext)
+    //      // TestClassifiers.GradientBoostedTree(Data, sqlContext)
+    //      // TestClassifiers.MultilayerPerceptronClassifier(Data, sqlContext)
 
     Testing_Data
-   
-    
-  }
-  
-  
-  
-  
-  def Triger(sc: SparkContext): Unit = {
-
-//    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-//    import sqlContext.implicits._
-//    import org.apache.spark.sql.functions._ // for UDF
-//    import org.apache.spark.sql.types._
-//
-//    //*******************************************************************************************************************************
-//    println("Please Enter 0 for JTriple and  1 for TRIX  process and 2 for RDFXML process and 3 for NormalXML:")
-//    val num = scala.io.StdIn.readLine()
-//
-//    if (num == "0") {
-//      println("JTriple.........!!!!!!")
-//      // Streaming records:RDFJtriple file :
-//      val jobConf = new JobConf()
-//
-//      val JTriple_Parser_OBJ = new ParseJTriple()
-//      val DRF_Builder_JTripleOBJ = new FacilitiesClass()
-//      val RDD_JTriple = JTriple_Parser_OBJ.Start_JTriple_Parser(jobConf, sc)
-//      RDD_JTriple.foreach(println)
-//      //----------------------------DF for RDF TRIX ------------------------------------------
-//      //  Create SQLContext Object:
-//      val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-//      val DFR_JTriple = DRF_Builder_JTripleOBJ.RDD_TO_DFR_JTriple(RDD_JTriple, sqlContext)
-//      DFR_JTriple.show()
-//
-//    }
-
-//    if (num == "1") {
-//
-//      println("TRIX.........!!!!!!")
-//      // Streaming records:RDFTRIX file :
-//      val jobConf = new JobConf()
-//
-//      val TRIX_Parser_OBJ = new ParseTRIX()
-//      val DRF_Builder_RDFTRIX_OBJ = new FacilitiesClass()
-//
-//      val RDD_TRIX = TRIX_Parser_OBJ.Start_TriX_Parser(jobConf, sc)
-//      RDD_TRIX.foreach(println)
-//
-//      //----------------------------DF for RDF TRIX ------------------------------------------
-//      //  Create SQLContext Object:
-//      val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-//      val DFR_TRIX = DRF_Builder_RDFTRIX_OBJ.RDD_TO_DFR_TRIX(RDD_TRIX, sqlContext)
-//      DFR_TRIX.show()
-//
-//    } //RDF XML file :*********************************************************************************************************
-//    else if (num == "2") {
-//      println("RDF XML .........!!!!!!")
-//      // Streaming records:RDFXML file :
-//      val jobConf_Record = new JobConf()
-//      val jobConf_Prefixes = new JobConf()
-//
-//      val RDFXML_Parser_OBJ = new ParseRDFXML()
-//      val DRF_Builder_RDFXML_OBJ = new FacilitiesClass()
-//
-//      val RDD_RDFXML = RDFXML_Parser_OBJ.start_RDFXML_Parser(jobConf_Record, jobConf_Prefixes, sc)
-//      RDD_RDFXML.foreach(println)
-//
-//      //----------------------------DF for RDF XML ------------------------------------------
-//      //  Create SQLContext Object:
-//      val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-//      val DFR_RDF_XML = DRF_Builder_RDFXML_OBJ.RDD_TO_DFR_RDFXML(RDD_RDFXML, sqlContext)
-//      DFR_RDF_XML.show()
-//      //
-//      // NOrmal XML Example WikiData: ***************************************************************************************************
-//    } else if (num == "3") {
-      // Streaming records:
-//      val jobConf = new JobConf()
-//      val NormalXML_Parser_OBJ = new ParseNormalXML()
-//      val RDD_OBJ = new ParseNormalXML()
-//      val RDD_All_Record1 = RDD_OBJ.Training_DB_NormalXML_Parser_Input1(sc)
-//      val RDD_All_Record2 = RDD_OBJ.Training_DB_NormalXML_Parser_Input2(sc)
-//      val RDD_All_Record3 = RDD_OBJ.Training_DB_NormalXML_Parser_Input3(sc)
-//      //RDD_All_Record1.foreach(println)
-//      //RDD_All_Record2.foreach(println)
-//      // RDD_All_Record3.foreach(println)
-//
-//      val RDD_All_Record = RDD_All_Record1.union(RDD_All_Record2).union(RDD_All_Record3).distinct().cache()
-//
-//      //println(RDD_All_Record.count())
-//      // println(RDD_All_Record.count())
-//
-//      // ======= Json part :
-//      //Json RDD : Each record has its Revision iD:
-//      val JsonRDD = RDD_All_Record.map(_.split("NNLL")).map(v => replacing_with_Quoto(v(0), v(8))).cache()
-//      //JsonRDD.foreach(println)
-//      //println(JsonRDD.count())
-//
-//      // Data set
-//      val Ds_Json = sqlContext.jsonRDD(JsonRDD).select("key", "id", "labels", "descriptions", "aliases", "claims", "sitelinks").cache()
-//      //Ds_Json.show()
-//      // println(Ds_Json.count())
-//
-//      // ======= Tags part : // Contributor IP here is in Decimal format not IP format and It is converted in ParseNormalXml stage
-//      val TagsRDD = RDD_All_Record.map(_.split("NNLL")).map(x => (x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11))).cache()
-//      val DF_Tags = TagsRDD.toDF("Rid", "Itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "model", "format", "sha").cache()
-//      //    DF_Tags.show()
-//      //    println(DF_Tags.count())
-//
-//      //======== Join Json part with Tag Part:============================
-//      //Joining to have full data
-//      val DF_First_DF_Result_Join_Tags_and_Json = DF_Tags.as("T1").join(Ds_Json.as("T2"), $"T1.Rid" === $"T2.key", "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha") //.orderBy("Rid", "Itemid")
-//      DF_First_DF_Result_Join_Tags_and_Json.registerTempTable("Data1")
-//      val dfr_DATA_JsonTages1 = sqlContext.sql("select * from Data1 order by itemid ,Rid ").cache()
-//
-//      val colNames = Seq("Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-//      val DF_Second = DF_First_DF_Result_Join_Tags_and_Json.toDF(colNames: _*) //.distinct()
-//      DF_Second.registerTempTable("Data2")
-//
-//      //===================================================================Parent // Previous Revision==============================================================================================================
-//      //val DF_Joined = result1.as("df1").join(result2.as("df2"), col("itemid") === col("itemid2") && col("index1") === col("index2") + 1, "leftouter").select("Rid", "itemid", "comment", "pid", "time", "contributorIP", "contributorID", "contributorName", "JsonText", "labels", "descriptions", "aliases", "claims", "sitelinks", "model", "format", "sha", "Rid2", "itemid2", "comment2", "pid2", "time2", "contributorIP2", "contributorID2", "contributorName2", "JsonText2", "labels2", "descriptions2", "aliases2", "claims2", "sitelinks2", "model2", "format2", "sha2")
-//      //.select("itemid", "Rid","pid","time","itemid2","Rid2","pid2","time2")
-//
-//      //Joining based on Parent Id to get the previous cases: ParentID
-//      val DF_Joined = DF_First_DF_Result_Join_Tags_and_Json.as("df1").join(DF_Second.as("df2"), $"df1.pid" === $"df2.Rid2", "leftouter").distinct()
-//
-//      val RDD_After_JoinDF = DF_Joined.rdd.distinct()
-//      val x = RDD_After_JoinDF.map(row => (row(0).toString().toInt, row)).cache()
-//      val part = new RangePartitioner(4, x)
-//      val partitioned = x.partitionBy(part).persist() // persist is important for this case and obligatory.
-//      //partitioned.foreach(println)
-//      //
-//      //      //=====================================================All Features Based on Categories of Features Data Type :==================================================================================
-//      //
-//      val Result_all_Features = partitioned.map { case (x, y) => (x.toString() + "," + All_Features(y).toString()) } // we convert the Pair RDD to String one LineRDD to be able to make DF based on ","
-//      //Result_all_Features.foreach(println)
-//      // println("nayef" + Result_all_Features.count())
-//
-//      // Conver the RDD of All Features to  DataFrame:
-//
-//      val schema = StructType(
-//
-//        //0
-//        StructField("Rid", IntegerType, false) ::
-//
-//          // Character Features :
-//          /* 1*/ StructField("C1uppercaseratio", DoubleType, false) :: /*2 */ StructField("C2lowercaseratio", DoubleType, false) :: /*3*/ StructField("C3alphanumericratio", DoubleType, false) ::
-//          /*4*/ StructField("C4asciiratio", DoubleType, false) :: /*5*/ StructField("C5bracketratio", DoubleType, false) :: /*6*/ StructField("C6digitalratio", DoubleType, false) ::
-//          /*7*/ StructField("C7latinratio", DoubleType, false) :: /*8*/ StructField("C8whitespaceratio", DoubleType, false) :: /* 9*/ StructField("C9puncratio", DoubleType, false) ::
-//          /*10*/ StructField("C10longcharacterseq", DoubleType, false) :: /*11*/ StructField("C11arabicratio", DoubleType, false) :: /*12*/ StructField("C12bengaliratio", DoubleType, false) ::
-//          /*13 */ StructField("C13brahmiratio", DoubleType, false) :: /*14*/ StructField("C14cyrilinratio", DoubleType, false) :: /*15*/ StructField("C15hanratio", DoubleType, false) ::
-//          /*16*/ StructField("c16malysiaratio", DoubleType, false) :: /*17*/ StructField("C17tamiratio", DoubleType, false) :: /*18*/ StructField("C18telugratio", DoubleType, false) ::
-//          /*19 */ StructField("C19symbolratio", DoubleType, false) :: /*20 */ StructField("C20alpharatio", DoubleType, false) :: /*21*/ StructField("C21visibleratio", DoubleType, false) ::
-//          /*22*/ StructField("C22printableratio", DoubleType, false) :: /*23*/ StructField("C23blankratio", DoubleType, false) :: /*24 */ StructField("C24controlratio", DoubleType, false) ::
-//          /* 25 */ StructField("C25hexaratio", DoubleType, false) ::
-//
-//          //word Features:
-//          /*26*/ StructField("W1languagewordratio", DoubleType, false) :: /*27 Boolean */ StructField("W2Iscontainlanguageword", DoubleType, false) :: /*28*/ StructField("W3lowercaseratio", DoubleType, false) ::
-//          /*29 Integer */ StructField("W4longestword", IntegerType, false) :: /*30 Boolean */ StructField("W5IscontainURL", DoubleType, false) :: /*31*/ StructField("W6badwordratio", DoubleType, false) ::
-//          /*32*/ StructField("W7uppercaseratio", DoubleType, false) :: /*33*/ StructField("W8banwordratio", DoubleType, false) :: /*34 Boolean */ StructField("W9FemalFirstName", DoubleType, false) ::
-//          /*35 Boolean */ StructField("W10MaleFirstName", DoubleType, false) :: /*36 Boolean */ StructField("W11IscontainBadword", DoubleType, false) :: /*37 Boolean*/ StructField("W12IsContainBanword", DoubleType, false) ::
-//          /*38 integer */ StructField("W13NumberSharewords", DoubleType, false) :: /*39 Integer */ StructField("W14NumberSharewordswithoutStopwords", DoubleType, false) ::
-//          /*40*/ StructField("W15PortionQid", DoubleType, false) :: /*41*/ StructField("W16PortionLnags", DoubleType, false) :: /*42*/ StructField("W17PortionLinks", DoubleType, false) ::
-//
-//          //
-//          //          // Sentences Features:
-//          /*43*/ StructField("S1CommentTailLength", DoubleType, false) :: /*44*/ StructField("S2SimikaritySitelinkandLabel", DoubleType, false) :: /*45*/ StructField("S3SimilarityLabelandSitelink", DoubleType, false) :: /*46*/ StructField("S4SimilarityCommentComment", DoubleType, false) ::
-//          //
-//          //          // Statements Features :
-//          /*47*/ StructField("SS1Property", StringType, false) :: /*48*/ StructField("SS2DataValue", StringType, false) :: /*49*/ StructField("SS3ItemValue", StringType, false) ::
-//          //
-//          //
-//          //        //User Features :
-//          /*50 Boolean*/ StructField("U1IsPrivileged", DoubleType, false) :: /*51 Boolean*/ StructField("U2IsBotUser", DoubleType, false) :: /*52 Boolean*/ StructField("U3IsBotuserWithFlaguser", DoubleType, false) ::
-//          /*53 Boolean*/ StructField("U4IsProperty", DoubleType, false) :: /*54 Boolean*/ StructField("U5IsTranslator", DoubleType, false) :: /*55 Boolean*/ StructField("U6IsRegister", DoubleType, false) ::
-//          /*56*/ StructField("U7IPValue", DoubleType, false) :: /*57*/ StructField("U8UserID", IntegerType, false) :: /*58*/ StructField("U9HasBirthDate", DoubleType, false) :: /*59*/ StructField("U10HasDeathDate", DoubleType, false) ::
-//
-//          //Items Features :
-//
-//          /*60*/ StructField("I1NumberLabels", DoubleType, false) :: /*61*/ StructField("I2NumberDescription", DoubleType, false) :: /*62*/ StructField("I3NumberAliases", DoubleType, false) :: /*63*/ StructField("I4NumberClaims", DoubleType, false) ::
-//          /*64*/ StructField("I5NumberSitelinks", DoubleType, false) :: /*65*/ StructField("I6NumberStatement", DoubleType, false) :: /*66*/ StructField("I7NumberReferences", DoubleType, false) :: /*67*/ StructField("I8NumberQualifier", DoubleType, false) ::
-//          /*68*/ StructField("I9NumberQualifierOrder", DoubleType, false) :: /*69*/ StructField("I10NumberBadges", DoubleType, false) :: /*70*/ StructField("I11ItemTitle", StringType, false) ::
-//
-//          // Revision Features:
-//          /*71*/ StructField("R1languageRevision", StringType, false) :: /*72*/ StructField("R2RevisionLanguageLocal", StringType, false) :: /*73*/ StructField("R3IslatainLanguage", DoubleType, false) ::
-//          /*74*/ StructField("R4JsonLength", DoubleType, false) :: /*75*/ StructField("R5RevisionAction", StringType, false) :: /*76*/ StructField("R6PrevReviAction", StringType, false) ::
-//          /*77*/ StructField("R7RevisionAccountChange", DoubleType, false) :: /*78*/ StructField("R8ParRevision", StringType, false) :: /*79*/ StructField("R9RevisionTime", StringType, false) ::
-//          /*80*/ StructField("R10RevisionSize", DoubleType, false) :: /*81*/ StructField("R11ContentType", StringType, false) :: /*82*/ StructField("R12BytesIncrease", DoubleType, false) ::
-//          /*83*/ StructField("R13TimeSinceLastRevi", DoubleType, false) :: /*84*/ StructField("R14CommentLength", DoubleType, false) :: /*85*/ StructField("R15RevisionSubaction", StringType, false) ::
-//          /*86*/ StructField("R16PrevReviSubaction", StringType, false) ::
-//
-//          Nil)
-//
-//      val rowRDD = Result_all_Features.map(line => line.split(",")).map(e ⇒ Row(e(0).toInt // character feature column
-//      , e(1).toDouble, e(2).toDouble, e(3).toDouble, e(4).toDouble, e(5).toDouble, e(6).toDouble, e(7).toDouble, e(8).toDouble, e(9).toDouble, RoundDouble(e(10).toDouble),
-//        e(11).toDouble, e(12).toDouble, e(13).toDouble, e(14).toDouble, e(15).toDouble, e(16).toDouble, e(17).toDouble, e(18).toDouble, e(19).toDouble, e(20).toDouble, e(21).toDouble, e(22).toDouble, e(23).toDouble, e(24).toDouble, e(25).toDouble //Word Feature column
-//        , e(26).toDouble, e(27).toDouble, e(28).toDouble, e(29).toDouble.toInt, e(30).toDouble, e(31).toDouble, e(32).toDouble, e(33).toDouble, e(34).toDouble, e(35).toDouble, e(36).toDouble, e(37).toDouble, RoundDouble(e(38).toDouble), RoundDouble(e(39).toDouble), e(40).toDouble, e(41).toDouble, e(42).toDouble // Sentences Features column:
-//        , RoundDouble(e(43).toDouble), e(44).toDouble, e(45).toDouble, e(46).toDouble //Statement Features Column: 
-//        , e(47), e(48), e(49) // User Features Column: 
-//        , e(50).toDouble, e(51).toDouble, e(52).toDouble, e(53).toDouble, e(54).toDouble, e(55).toDouble, e(56).toDouble, e(57).toDouble.toInt, e(58).toDouble, e(59).toDouble //Item Features column:
-//        , e(60).toDouble, e(61).toDouble, e(62).toDouble, e(63).toDouble, e(64).toDouble, e(65).toDouble, e(66).toDouble, e(67).toDouble, e(68).toDouble, e(69).toDouble, "Q" + e(70).toDouble.toInt.toString() //Revision Features Column: 
-//        , e(71), e(72), e(73).toDouble, e(74).toDouble, e(75), e(76), e(77).toDouble, e(78), e(79), e(80).toDouble, e(81), e(82).toDouble, e(83).toDouble, e(84).toDouble, e(85), e(86)))
-//
-//      //a.User Frequency:
-//      //number of revisions a user has contributed
-//      //val resu= DF_Tags.groupBy("contributorID").agg(count("Rid"))
-//      DF_Tags.registerTempTable("TagesTable")
-//      val ContributorFreq_for_Each_Revision_DF = sqlContext.sql("select contributorID as CIDUSER1, count(Rid) as NumberofRevisionsUserContributed from TagesTable where contributorID !='0' group by contributorID ") //.drop("CIDUSER1")
-//      //ContributorFreq_for_Each_Revision_DF.show()
-//
-//      //b.Cumulated : Number of a unique Item a user has contributed.
-//      val CumulatedNumberof_uniqueItemsForUser_DF = sqlContext.sql("select contributorID as CIDUSER2,  COUNT(DISTINCT itemid) as NumberofUniqueItemsUseredit from TagesTable where contributorID !='0' group by contributorID") //.drop("CIDUSER2")
-//      //CumulatedNumberof_uniqueItemsForUser_DF.show()
-//
-//      //1.Item Frequency:
-//      // number of revisions an Item has
-//      val ItemFrequ_DF = sqlContext.sql("select itemid, count(Rid) as NumberRevisionItemHas from TagesTable  group by itemid")
-//      // ItemFrequ_DF.show()
-//
-//      //2. Cumulate number of unique users have edited the Item : Did not consider the users IP. Contributor is an IP or Name. we consider name
-//      val CumulatedNumberof_UniqueUserForItem_DF = sqlContext.sql("select itemid,  COUNT(DISTINCT contributorID) as NumberUniqUserEditItem from TagesTable where contributorID !='0' group by itemid")
-//      //CumulatedNumberof_UniqueUserForItem_DF.show()
-//
-//      //3. freq each Item :
-//      val Fre_Item_DF = sqlContext.sql("select itemid,  COUNT(itemid) as FreqItem from TagesTable  group by itemid")
-//      // Fre_Item_DF.show()
-//
-//      //*****************************************************************************************************************************************
-//      // This is Main DataFrame:
-//      val BeforeJoin_All_Features = sqlContext.createDataFrame(rowRDD, schema)
-//      //BeforeJoin_All_Features.show()
-//
-//      //********************************** User feature Join
-//
-//      // Join1 for add The first User Feature : number of revisions a user has contributed
-//      val AfterJoinUser1_All_Features = BeforeJoin_All_Features.as("T1").join(ContributorFreq_for_Each_Revision_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER1", "leftouter").drop("CIDUSER1")
-//      //AfterJoinUser1_All_Features.show()
-//
-//      // Join2 for add The second  User Feature
-//      val AfterJoinUser2_All_Features = AfterJoinUser1_All_Features.as("T1").join(CumulatedNumberof_uniqueItemsForUser_DF.as("T2"), $"T1.U8UserID" === $"T2.CIDUSER2", "leftouter").drop("CIDUSER2")
-//      //AfterJoinUser2_All_Features.show()
-//
-//      //********************************** Item Feature Join
-//      // Join3 for add The First  Item Feature :number of revisions an Item has
-//      val AfterJoinItem3_All_Features = AfterJoinUser2_All_Features.as("T1").join(ItemFrequ_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-//      // AfterJoinItem3_All_Features.show()
-//
-//      // Join4 for add The Second  Item Feature
-//      val AfterJoinItem4_All_Features = AfterJoinItem3_All_Features.as("T1").join(CumulatedNumberof_UniqueUserForItem_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-//      // AfterJoinItem4_All_Features.show()
-//
-//      // Join5 for add The Third  Item Feature
-//      val AfterJoinItem5_All_Features = AfterJoinItem4_All_Features.as("T1").join(Fre_Item_DF.as("T2"), $"T1.I11ItemTitle" === $"T2.itemid", "leftouter").drop("itemid")
-//      //2 AfterJoinItem5_All_Features.show()
-//
-//      //********************************
-//
-//      //*Geografical information Feature from Meta File
-//      //REVISION_ID|REVISION_SESSION_ID|USER_COUNTRY_CODE|USER_CONTINENT_CODE|USER_TIME_ZONE|USER_REGION_CODE|USER_CITY_NAME|USER_COUNTY_NAME|REVISION_TAGS
-//      val df_GeoInf = sqlContext.read
-//        .format("com.databricks.spark.csv")
-//        .option("header", "true") // Use first line of all files as header
-//        .option("inferSchema", "true") // Automatically infer data types
-//        .load("hdfs://localhost:9000/mydata/Meta.csv").select("REVISION_ID", "REVISION_SESSION_ID", "USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")
-//      // df_GeoInf.show()
-//
-//      val df_Truth = sqlContext.read
-//        .format("com.databricks.spark.csv")
-//        .option("header", "true") // Use first line of all files as header
-//        .option("inferSchema", "true") // Automatically infer data types
-//        .load("hdfs://localhost:9000/mydata/truth.csv").select("REVISION_ID", "ROLLBACK_REVERTED", "UNDO_RESTORE_REVERTED")
-//      // df_GeoInf.show()
-//
-//      val AfterJoinGeoInfo_All_Features = AfterJoinItem5_All_Features.as("T1").join(df_GeoInf.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-//      // AfterJoinGeoInfo_All_Features.show()
-//
-//      val Final_All_Features = AfterJoinGeoInfo_All_Features.as("T1").join(df_Truth.as("T2"), $"T1.Rid" === $"T2.REVISION_ID", "leftouter").drop("REVISION_ID").cache()
-//      //Final_All_Features.show()
-//
-//      // Pre- process Data ============================================================================================================================================================
-//
-//      // For String Column, We fill the Null values by "NA":
-//
-//      var Fill_Missing_Final_All_Features = Final_All_Features.na.fill("NA", Seq("USER_COUNTRY_CODE", "USER_CONTINENT_CODE", "USER_TIME_ZONE", "USER_REGION_CODE", "USER_CITY_NAME", "USER_COUNTY_NAME", "REVISION_TAGS")).cache()
-//
-//      // For Integer Frequency  Column, We fill the Null values by 0:
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.na.fill(0, Seq("FreqItem", "NumberUniqUserEditItem", "NumberRevisionItemHas", "NumberofUniqueItemsUseredit", "NumberofRevisionsUserContributed", "REVISION_SESSION_ID")).cache()
-//      //Fill_Missing_Final_All_Features.show()
-//
-//      val BoolToDoubleUDF = udf { (BoolAsString: String) => if (BoolAsString == "T") 1.0 else 0.0 }
-//      val IntegerToDouble = udf { (IntegerRevisionSessionID: Integer) => IntegerRevisionSessionID.toDouble }
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalROLLBACK_REVERTED", BoolToDoubleUDF(col("ROLLBACK_REVERTED")))
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalUNDO_RESTORE_REVERTED", BoolToDoubleUDF(col("UNDO_RESTORE_REVERTED")))
-//
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalREVISION_SESSION_ID", IntegerToDouble(col("REVISION_SESSION_ID")))
-//
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofRevisionsUserContributed", IntegerToDouble(col("NumberofRevisionsUserContributed")))
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberofUniqueItemsUseredit", IntegerToDouble(col("NumberofUniqueItemsUseredit")))
-//
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberRevisionItemHas", IntegerToDouble(col("NumberRevisionItemHas")))
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalNumberUniqUserEditItem", IntegerToDouble(col("NumberUniqUserEditItem")))
-//      Fill_Missing_Final_All_Features = Fill_Missing_Final_All_Features.withColumn("FinalFreqItem", IntegerToDouble(col("FreqItem")))
-//
-//      //===========================================================================Caharacter Features : Double , Integer Features ====================================================================================
-//      //Double Ratio:  For Ratio Double column, Fill -1 value by Median:Character Features + Ratio of Word Features :
-//      var Samples = Fill_Missing_Final_All_Features.sample(false, 0.001).cache() //.where($"S2SimikaritySitelinkandLabel">0.0 || $"S3SimilarityLabelandSitelink">0.0 || $"S4SimilarityCommentComment">0.0)
-//      Samples.registerTempTable("df")
-//
-//      val Query = "select " +
-//        "percentile_approx(C1uppercaseratio, 0.5) as meadian1" + "," + "percentile_approx(C2lowercaseratio, 0.5) as median2" + " ," +
-//        "percentile_approx(C3alphanumericratio, 0.5) as median3" + "," + "percentile_approx(C4asciiratio, 0.5) as median4" + "," +
-//        "percentile_approx(C5bracketratio, 0.5) as median5" + "," + "percentile_approx(C6digitalratio, 0.5) as median6" + "," +
-//        "percentile_approx(C7latinratio, 0.5) as median7" + "," + "percentile_approx(C8whitespaceratio, 0.5) as median8" + "," +
-//        "percentile_approx(C9puncratio, 0.5) as median9" + "," + "percentile_approx(C11arabicratio, 0.5) as median11" + "," +
-//        "percentile_approx(C12bengaliratio, 0.5) as median12" + "," + "percentile_approx(C13brahmiratio, 0.5) as median13" + "," +
-//        "percentile_approx(C14cyrilinratio, 0.5) as median14" + "," + "percentile_approx(C15hanratio, 0.5) as median15" + "," +
-//        "percentile_approx(c16malysiaratio, 0.5) as median16" + "," +
-//        "percentile_approx(C17tamiratio, 0.5) as median17" + "," + "percentile_approx(C18telugratio, 0.5) as median18" + "," +
-//        "percentile_approx(C19symbolratio, 0.5) as median19" + "," + "percentile_approx(C20alpharatio, 0.5) as median20" + "," +
-//        "percentile_approx(C21visibleratio, 0.5) as median21" + "," + "percentile_approx(C22printableratio, 0.5) as median22" + "," +
-//        "percentile_approx(C23blankratio, 0.5) as median23" + "," + "percentile_approx(C24controlratio, 0.5) as median24" + "," +
-//        "percentile_approx(C25hexaratio, 0.5) as median25" ++ "," + "percentile_approx(W1languagewordratio, 0.5) as median26" + "," +
-//        "percentile_approx(W3lowercaseratio, 0.5) as median27" + "," + "percentile_approx(W6badwordratio, 0.5) as median28" + "," +
-//        "percentile_approx(W7uppercaseratio, 0.5) as median27" + "," + "percentile_approx(W8banwordratio, 0.5) as median27" + " from df"
-//
-//      val medianValues = sqlContext.sql(Query).rdd
-//      val Median = medianValues.first()
-//
-//      // Median :
-//      // Character Ratio Features: UDF
-//      val lkpUDF1 = udf { (i: Double) => if (i == 0) Median(0).toString().toDouble else i }
-//      val lkpUDF2 = udf { (i: Double) => if (i == 0) Median(1).toString().toDouble else i }
-//      val lkpUDF3 = udf { (i: Double) => if (i == 0) Median(2).toString().toDouble else i }
-//      val lkpUDF4 = udf { (i: Double) => if (i == 0) Median(3).toString().toDouble else i }
-//      val lkpUDF5 = udf { (i: Double) => if (i == 0) Median(4).toString().toDouble else i }
-//      val lkpUDF6 = udf { (i: Double) => if (i == 0) Median(5).toString().toDouble else i }
-//      val lkpUDF7 = udf { (i: Double) => if (i == 0) Median(6).toString().toDouble else i }
-//      val lkpUDF8 = udf { (i: Double) => if (i == 0) Median(7).toString().toDouble else i }
-//      val lkpUDF9 = udf { (i: Double) => if (i == 0) Median(8).toString().toDouble else i }
-//
-//      val lkpUDF11 = udf { (i: Double) => if (i == 0) Median(9).toString().toDouble else i }
-//      val lkpUDF12 = udf { (i: Double) => if (i == 0) Median(10).toString().toDouble else i }
-//      val lkpUDF13 = udf { (i: Double) => if (i == 0) Median(11).toString().toDouble else i }
-//      val lkpUDF14 = udf { (i: Double) => if (i == 0) Median(12).toString().toDouble else i }
-//      val lkpUDF15 = udf { (i: Double) => if (i == 0) Median(13).toString().toDouble else i }
-//      val lkpUDF16 = udf { (i: Double) => if (i == 0) Median(14).toString().toDouble else i }
-//      val lkpUDF17 = udf { (i: Double) => if (i == 0) Median(15).toString().toDouble else i }
-//      val lkpUDF18 = udf { (i: Double) => if (i == 0) Median(16).toString().toDouble else i }
-//      val lkpUDF19 = udf { (i: Double) => if (i == 0) Median(17).toString().toDouble else i }
-//      val lkpUDF20 = udf { (i: Double) => if (i == 0) Median(18).toString().toDouble else i }
-//      val lkpUDF21 = udf { (i: Double) => if (i == 0) Median(19).toString().toDouble else i }
-//      val lkpUDF22 = udf { (i: Double) => if (i == 0) Median(20).toString().toDouble else i }
-//      val lkpUDF23 = udf { (i: Double) => if (i == 0) Median(21).toString().toDouble else i }
-//      val lkpUDF24 = udf { (i: Double) => if (i == 0) Median(22).toString().toDouble else i }
-//      val lkpUDF25 = udf { (i: Double) => if (i == 0) Median(23).toString().toDouble else i }
-//
-//      val df1 = Fill_Missing_Final_All_Features.withColumn("FinalC1uppercaseratio", lkpUDF1(col("C1uppercaseratio"))) //.drop("C1uppercaseratio").cache()
-//      val df2 = df1.withColumn("FinalC2lowercaseratio", lkpUDF2(col("C2lowercaseratio"))) //.drop("C2lowercaseratio").cache()
-//      //df1.unpersist()
-//      val df3 = df2.withColumn("FinalC3alphanumericratio", lkpUDF3(col("C3alphanumericratio"))) //.drop("C3alphanumericratio").cache()
-//      //df2.unpersist()
-//      val df4 = df3.withColumn("FinalC4asciiratio", lkpUDF4(col("C4asciiratio"))) //.drop("C4asciiratio").cache()
-//      //df3.unpersist()
-//      val df5 = df4.withColumn("FinalC5bracketratio", lkpUDF5(col("C5bracketratio"))) //.drop("C5bracketratio").cache()
-//      //df4.unpersist()
-//      val df6 = df5.withColumn("FinalC6digitalratio", lkpUDF6(col("C6digitalratio"))) //.drop("C6digitalratio").cache()
-//      //df5.unpersist()
-//      val df7 = df6.withColumn("FinalC7latinratio", lkpUDF7(col("C7latinratio"))) //.drop("C7latinratio").cache()
-//      //df6.unpersist()
-//      val df8 = df7.withColumn("FinalC8whitespaceratio", lkpUDF8(col("C8whitespaceratio"))) //.drop("C8whitespaceratio").cache()
-//      //df7.unpersist()
-//      val df9 = df8.withColumn("FinalC9puncratio", lkpUDF9(col("C9puncratio"))) //.drop("C9puncratio").cache()
-//
-//      // Mean :
-//      // character integer values :
-//      val Mean_C10longcharacterseq = Samples.agg(mean("C10longcharacterseq")).head()
-//      val C10_Mean = Mean_C10longcharacterseq.getDouble(0)
-//      val lkpUDFC10 = udf { (i: Double) => if (i == 0) C10_Mean else i }
-//      val df10 = df9.withColumn("FinalC10longcharacterseq", lkpUDFC10(col("C10longcharacterseq")))
-//
-//      //Median
-//      val df11 = df10.withColumn("FinalC11arabicratio", lkpUDF11(col("C11arabicratio"))) //.drop("C11arabicratio").cache()
-//      // df9.unpersist()
-//      val df12 = df11.withColumn("FinalC12bengaliratio", lkpUDF12(col("C12bengaliratio"))) //.drop("C12bengaliratio").cache()
-//      //df11.unpersist()
-//      val df13 = df12.withColumn("FinalC13brahmiratio", lkpUDF13(col("C13brahmiratio"))) //.drop("C13brahmiratio").cache()
-//      // df12.unpersist()
-//      val df14 = df13.withColumn("FinalC14cyrilinratio", lkpUDF14(col("C14cyrilinratio"))) //.drop("C14cyrilinratio").cache()
-//      // df13.unpersist()
-//      val df15 = df14.withColumn("FinalC15hanratio", lkpUDF15(col("C15hanratio"))) //.drop("C15hanratio").cache()
-//      // df14.unpersist()
-//      val df16 = df15.withColumn("Finalc16malysiaratio", lkpUDF16(col("c16malysiaratio"))) //.drop("c16malysiaratio").cache()
-//      //df15.unpersist()
-//      val df17 = df16.withColumn("FinalC17tamiratio", lkpUDF17(col("C17tamiratio"))) //.drop("C17tamiratio").cache()
-//      //df16.unpersist()
-//      val df18 = df17.withColumn("FinalC18telugratio", lkpUDF18(col("C18telugratio"))) //.drop("C18telugratio").cache()
-//      //df17.unpersist()
-//      val df19 = df18.withColumn("FinalC19symbolratio", lkpUDF19(col("C19symbolratio"))) //.drop("C19symbolratio").cache()
-//      //df18.unpersist()
-//      val df20 = df19.withColumn("FinalC20alpharatio", lkpUDF20(col("C20alpharatio"))) //.drop("C20alpharatio").cache()
-//      // df19.unpersist()
-//      val df21 = df20.withColumn("FinalC21visibleratio", lkpUDF21(col("C21visibleratio"))) //.drop("C21visibleratio").cache()
-//      // df20.unpersist()
-//      val df22 = df21.withColumn("FinalC22printableratio", lkpUDF22(col("C22printableratio"))) //.drop("C22printableratio").cache()
-//      //df21.unpersist()
-//      val df23 = df22.withColumn("FinalC23blankratio", lkpUDF23(col("C23blankratio"))) //.drop("C23blankratio").cache()
-//      // df22.unpersist()
-//      val df24 = df23.withColumn("FinalC24controlratio", lkpUDF24(col("C24controlratio"))) //.drop("C24controlratio").cache()
-//      //df23.unpersist()
-//      val df25 = df24.withColumn("FinalC25hexaratio", lkpUDF25(col("C25hexaratio"))) //.drop("C25hexaratio").cache()
-//
-//      //************************************************End Character Features ****************************************************************************************
-//
-//      //************************************************Start Word  Features ****************************************************************************************
-//
-//      // Word Ratio Features : UDF
-//      val lkpUDFW1 = udf { (i: Double) => if (i == 0) Median(24).toString().toDouble else i }
-//      val lkpUDFW3 = udf { (i: Double) => if (i == 0) Median(25).toString().toDouble else i }
-//      val lkpUDFW6 = udf { (i: Double) => if (i == 0) Median(26).toString().toDouble else i }
-//      val lkpUDFW7 = udf { (i: Double) => if (i == 0) Median(27).toString().toDouble else i }
-//      val lkpUDFW8 = udf { (i: Double) => if (i == 0) Median(28).toString().toDouble else i }
-//
-//      //1.
-//      val df26 = df25.withColumn("FinalW1languagewordratio", lkpUDFW1(col("W1languagewordratio"))) //.drop("W1languagewordratio").cache()
-//
-//      //2.Boolean(Double) IsContainLanguageWord
-//
-//      //3.
-//      val df27 = df26.withColumn("FinalW3lowercaseratio", lkpUDFW3(col("W3lowercaseratio"))) //.drop("W3lowercaseratio").cache()
-//      // df26.unpersist()
-//
-//      //4. Integer " Mean:
-//      val Mean_W4longestword = Samples.agg(mean("W4longestword")).head()
-//      val W4_Mean = Mean_W4longestword.getDouble(0)
-//      val lkpUDFW4 = udf { (i: Double) => if (i == 0) W4_Mean else i }
-//      val df28 = df27.withColumn("FinalW4longestword", lkpUDFW4(col("W4longestword")))
-//
-//      //5. Boolean (Double ) W5IscontainURL
-//      //6.
-//      val df29 = df28.withColumn("FinalW6badwordratio", lkpUDFW6(col("W6badwordratio"))) //.drop("W6badwordratio").cache()
-//
-//      //7.
-//      val df30 = df29.withColumn("FinalW7uppercaseratio", lkpUDFW7(col("W7uppercaseratio"))) //.drop("W7uppercaseratio").cache()
-//
-//      //8.
-//      val df31 = df30.withColumn("FinalW8banwordratio", lkpUDFW8(col("W8banwordratio"))) //.drop("W8banwordratio").cache()
-//
-//      //9.FemalFirst       Boolean(Double)
-//      //10.Male First      Boolean(Double)
-//      //11.ContainBadWord  Boolean(Double)
-//      //12ContainBanWord   Boolean(Double)
-//
-//      //13. Integer(Double):
-//      val Mean_W13W13NumberSharewords = Samples.agg(mean("W13NumberSharewords")).head()
-//      val W13_Mean = Mean_W13W13NumberSharewords.getDouble(0)
-//      val lkpUDFW13 = udf { (i: Double) => if (i == 0) W13_Mean else i }
-//      val df32 = df31.withColumn("FinalW13NumberSharewords", lkpUDFW13(col("W13NumberSharewords")))
-//
-//      //14. Integer (Double):
-//      val Mean_W14NumberSharewordswithoutStopwords = Samples.agg(mean("W14NumberSharewordswithoutStopwords")).head()
-//      val W14_Mean = Mean_W14NumberSharewordswithoutStopwords.getDouble(0)
-//      val lkpUDFW14 = udf { (i: Double) => if (i == 0) W14_Mean else i }
-//      val df33 = df32.withColumn("FinalW14NumberSharewordswithoutStopwords", lkpUDFW14(col("W14NumberSharewordswithoutStopwords")))
-//
-//      // 15. Double (Not ratio):
-//      val Mean_W15PortionQid = Samples.agg(mean("W15PortionQid")).head()
-//      val W15_Mean = Mean_W15PortionQid.getDouble(0)
-//      val lkpUDFW15 = udf { (i: Double) => if (i == 0) W15_Mean else i }
-//      val df34 = df33.withColumn("FinalW15PortionQid", lkpUDFW15(col("W15PortionQid")))
-//
-//      //16. Double(Not Ratio):
-//      val Mean_W16PortionLnags = Samples.agg(mean("W16PortionLnags")).head()
-//      val W16_Mean = Mean_W16PortionLnags.getDouble(0)
-//      val lkpUDFW16 = udf { (i: Double) => if (i == 0) W16_Mean else i }
-//      val df35 = df34.withColumn("FinalW16PortionLnags", lkpUDFW16(col("W16PortionLnags")))
-//
-//      //17.Double(Not ratio):
-//      val Mean_W17PortionLinks = Samples.agg(mean("W17PortionLinks")).head()
-//      val W17_Mean = Mean_W17PortionLinks.getDouble(0)
-//      val lkpUDFW17 = udf { (i: Double) => if (i == 0) W17_Mean else i }
-//      val df36 = df35.withColumn("FinalW17PortionLinks", lkpUDFW17(col("W17PortionLinks")))
-//
-//      //************************************************End Word  Features ****************************************************************************************
-//
-//      //************************************************Start Sentences  Features ****************************************************************************************
-//      // 1. Integer(Double)
-//      val Mean_S1CommentTailLength = Samples.agg(mean("S1CommentTailLength")).head()
-//      val S1_Mean = RoundDouble(Mean_S1CommentTailLength.getDouble(0))
-//      val lkpUDFS1 = udf { (i: Double) => if (i == 0) S1_Mean else i }
-//      val df37 = df36.withColumn("FinalS1CommentTailLength", lkpUDFS1(col("S1CommentTailLength")))
-//
-//      //2. Double  but Not ratio values :
-//      val Mean_S2SimikaritySitelinkandLabel = Samples.agg(mean("S2SimikaritySitelinkandLabel")).head()
-//      val S2_Mean = RoundDouble(Mean_S2SimikaritySitelinkandLabel.getDouble(0))
-//      val lkpUDFS2 = udf { (i: Double) => if (i == 0) S2_Mean else i }
-//      val df39 = df37.withColumn("FinalS2SimikaritySitelinkandLabel", lkpUDFS2(col("S2SimikaritySitelinkandLabel")))
-//
-//      //3. Double  but Not ratio values :
-//      val Mean_S3SimilarityLabelandSitelink = Samples.agg(mean("S3SimilarityLabelandSitelink")).head()
-//      val S3_Mean = RoundDouble(Mean_S3SimilarityLabelandSitelink.getDouble(0))
-//      val lkpUDFS3 = udf { (i: Double) => if (i == 0.0) S3_Mean else i }
-//      val df40 = df39.withColumn("FinalS3SimilarityLabelandSitelink", lkpUDFS3(col("S3SimilarityLabelandSitelink")))
-//
-//      //4.  Double  but Not ratio values :
-//      val Mean_S4SimilarityCommentComment = Samples.agg(mean("S4SimilarityCommentComment")).head()
-//      val S4_Mean = RoundDouble(Mean_S4SimilarityCommentComment.getDouble(0))
-//      val lkpUDFS4 = udf { (i: Double) => if (i == 0.0) S4_Mean else i }
-//      val df41 = df40.withColumn("FinalS4SimilarityCommentComment", lkpUDFS4(col("S4SimilarityCommentComment")))
-//
-//      //df41.show()
-//      //************************************************End Sentences  Features ****************************************************************************************
-//      //*********************************************** Start Statement  Features ****************************************************************************************
-//      //1. String
-//      //2. String
-//      //3. String
-//      //************************************************End Statement  Features ****************************************************************************************
-//      //*********************************************** Start User Features ****************************************************************************************
-//
-//      //1.Boolean(Double)
-//      //2.Boolean(Double)
-//      //3.Boolean(Double)
-//      //4.Boolean(Double)
-//      //5.Boolean(Double)
-//      //6.Boolean(Double)
-//      //7. (Double) IP No need to fill Missing Data
-//      //8. (Double) ID No need to fill Missing Data
-//      //9.Boolean(Double)
-//      //10.Boolean(Double)
-//
-//      //*********************************************** End User Features ****************************************************************************************
-//      //*********************************************** Start Item Features ****************************************************************************************
-//      //1. Integer (Double) No need to fill missing values
-//      //2. Integer (Double) No need to fill missing values
-//      //3. Integer (Double) No need to fill missing values
-//      //4. Integer (Double) No need to fill missing values
-//      //5. Integer (Double) No need to fill missing values
-//      //6. Integer (Double) No need to fill missing values
-//      //7. Integer (Double) No need to fill missing values
-//      //8. Integer (Double) No need to fill missing values
-//      //9. Integer (Double) No need to fill missing values
-//      //10. Integer (Double) No need to fill missing values
-//      //11. String
-//      //*********************************************** End Item Features ****************************************************************************************
-//      //*********************************************** Start Revision Features ****************************************************************************************
-//      //1.String
-//      //2.String
-//      //3.Boolean (Double)
-//      //4.Integer(Double)
-//      //5.String
-//      //6.String
-//      //7. Boolean(Double)
-//      //8. String
-//      //9.String
-//      //10. Integer (Double)
-//      //11.String
-//      //12. integer(Double)
-//      //13. Long(Double)
-//      //14. integer (Double)
-//      //15.String
-//      //16.String
-//      //*********************************************** End Revision Features ****************************************************************************************
-//      //*********************************************** Meta Data , Truth Data and Frequnces  ****************************************************************************************
-//      //Meta
-//      // 1.Revision Session :Integer (Converted to Double)
-//      //2. User Country Code
-//      //3.User Continent Code
-//      //4.User Time Size
-//      //5.User Region Code
-//      //6.User-city Name
-//      //7.User Country Name
-//      //8.RevisionTags
-//
-//      // Truth:
-//      //1.Undo
-//
-//      // Freq :
-//
-//      //1.5 features
-//
-//      // Roll Boolean     :Boolean (Double)
-//      // Undo             :Boolean (Double)
-//
-//      //*********************************************** End Revision Features ****************************************************************************************
-//
-//      //===========================================================================String Features====================================================================================
-//
-//      val df42 = df41.withColumn(
-//        //statement String features:
-//        "StringFeatures", concat($"SS1Property", lit(";"), $"SS2DataValue", lit(";"), $"SS3ItemValue", lit(";"), $"I11ItemTitle",
-//          //Revision  String Features:
-//          lit(";"), $"R1languageRevision",
-//          lit(";"), $"R2RevisionLanguageLocal",
-//          lit(";"), $"R5RevisionAction",
-//          lit(";"), $"R6PrevReviAction",
-//          lit(";"), $"R8ParRevision",
-//          lit(";"), $"R9RevisionTime",
-//          lit(";"), $"R11ContentType",
-//          lit(";"), $"R15RevisionSubaction",
-//          lit(";"), $"R16PrevReviSubaction",
-//
-//          lit(";"), $"USER_COUNTRY_CODE",
-//          lit(";"), $"USER_CONTINENT_CODE",
-//          lit(";"), $"USER_TIME_ZONE",
-//          lit(";"), $"USER_REGION_CODE",
-//          lit(";"), $"USER_CITY_NAME",
-//          lit(";"), $"USER_COUNTY_NAME",
-//          lit(";"), $"REVISION_TAGS"))
-//
-//      val toArray = udf((record: String) => record.split(";").map(_.toString()))
-//      val test1 = df42.withColumn("StringFeatures", toArray(col("StringFeatures")))
-//      //  test1.show()
-//      //  test1.printSchema()
-//
-//      val word2Vec = new Word2Vec().setInputCol("StringFeatures").setOutputCol("result").setVectorSize(20).setMinCount(0)
-//      val model = word2Vec.fit(test1)
-//      val result = model.transform(test1) //.rdd
-//
-//      // result.show()
-//
-//      val Todense = udf((b: Vector) => b.toDense)
-//      val test_new2 = result.withColumn("result", Todense(col("result")))
-//
-//      val assembler = new VectorAssembler().setInputCols(Array(
-//        "result",
-//
-//        // character
-//        "FinalC1uppercaseratio", "FinalC2lowercaseratio", "FinalC3alphanumericratio", "FinalC4asciiratio", "FinalC5bracketratio", "FinalC6digitalratio",
-//        "FinalC7latinratio", "FinalC8whitespaceratio", "FinalC9puncratio", "FinalC10longcharacterseq", "FinalC11arabicratio", "FinalC12bengaliratio",
-//        "FinalC13brahmiratio", "FinalC14cyrilinratio", "FinalC15hanratio", "Finalc16malysiaratio", "FinalC17tamiratio", "FinalC18telugratio",
-//        "FinalC19symbolratio", "FinalC20alpharatio", "FinalC21visibleratio", "FinalC22printableratio", "FinalC23blankratio", "FinalC24controlratio", "FinalC25hexaratio",
-//
-//        // Words
-//        "FinalW1languagewordratio", "W2Iscontainlanguageword", "FinalW3lowercaseratio", "FinalW4longestword", "W5IscontainURL", "FinalW6badwordratio",
-//        "FinalW7uppercaseratio", "FinalW8banwordratio", "W9FemalFirstName", "W10MaleFirstName", "W11IscontainBadword", "W12IsContainBanword",
-//        "FinalW13NumberSharewords", "FinalW14NumberSharewordswithoutStopwords", "FinalW15PortionQid", "FinalW16PortionLnags", "FinalW17PortionLinks",
-//
-//        //Sentences :
-//        "FinalS1CommentTailLength", "FinalS2SimikaritySitelinkandLabel", "FinalS3SimilarityLabelandSitelink", "FinalS4SimilarityCommentComment",
-//
-//        // User :
-//        "U1IsPrivileged", "U2IsBotUser", "U3IsBotuserWithFlaguser", "U4IsProperty", "U5IsTranslator", "U6IsRegister", "U7IPValue", "U8UserID",
-//        "U9HasBirthDate", "U10HasDeathDate",
-//
-//        //Item:
-//
-//        "I1NumberLabels", "I2NumberDescription", "I3NumberAliases", "I4NumberClaims", "I5NumberSitelinks", "I6NumberStatement",
-//        "I7NumberReferences", "I8NumberQualifier", "I9NumberQualifierOrder", "I10NumberBadges",
-//
-//        //Revision:
-//        "R3IslatainLanguage", "R4JsonLength", "R7RevisionAccountChange", "R10RevisionSize", "R12BytesIncrease",
-//        "R13TimeSinceLastRevi", "R14CommentLength",
-//
-//        // Meta , truth , Freq
-//        // meta :
-//        "FinalREVISION_SESSION_ID",
-//        // Truth:
-//        "FinalUNDO_RESTORE_REVERTED",
-//
-//        //Freq:
-//        "FinalNumberofRevisionsUserContributed",
-//        "FinalNumberofUniqueItemsUseredit", "FinalNumberRevisionItemHas", "FinalNumberUniqUserEditItem", "FinalFreqItem")).setOutputCol("features")
-//      val NewData = assembler.transform(test_new2)
-//
-//      // Prepare the data for classification:
-//      NewData.registerTempTable("DB")
-//      val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB")
-//      //        val Data = sqlContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB") // for logistic regrision
-//
-//      //Data.show()
-//
-//      val TestClassifiers = new Classifiers()
-//
-//      // TestClassifiers.RandomForestClassifer(Data, sqlContext)
-//      // TestClassifiers.DecisionTreeClassifier(Data, sqlContext)
-//      // TestClassifiers.LogisticRegrision(Data, sqlContext)
-//      // TestClassifiers.GradientBoostedTree(Data, sqlContext)
-//      // TestClassifiers.MultilayerPerceptronClassifier(Data, sqlContext)
-//
-//    }
+
   }
+
   //===========================================================================================================================================
   //=================================================Functions Part=============================================================================
 
@@ -2007,11 +1331,11 @@ class VandalismDetection extends Serializable {
   def All_Features(row: Row): String = {
 
     var temp = ""
-    //all characters
+    // all characters
     val character_Str_String = Character_Features(row)
     temp = character_Str_String
 
-    //all Words
+    // all Words
     val Words_Str_String = Words_Features(row)
     temp = temp + "," + Words_Str_String
 
@@ -2023,15 +1347,15 @@ class VandalismDetection extends Serializable {
     val Statement_Str_String = Statement_Features(row)
     temp = temp + "," + Statement_Str_String
 
-    //User Features -  there are 3 Joins in last stage when we have Data Frame
+    // User Features -  there are 3 Joins in last stage when we have Data Frame
     val User_Str_String = User_Features_Normal(row)
     temp = temp + "," + User_Str_String
 
-    //Item Features -  there are 3 Joins in last stage when we have Data Frame
+    // Item Features -  there are 3 Joins in last stage when we have Data Frame
     val Item_Str_String = Item_Features(row)
     temp = temp + "," + Item_Str_String
 
-    //Revision Features
+    // Revision Features
     val Revision_Str_String = Revision_Features(row)
     temp = temp + "," + Revision_Str_String
 
@@ -2043,13 +1367,13 @@ class VandalismDetection extends Serializable {
   def Character_Features(row: Row): String = {
 
     var str_results = ""
-    //1. Row from  partitioned Pair RDD:
+    // 1. Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
-    //2. Revision ID current operation:
+    // 2. Revision ID current operation:
     var RevisionID = row(0)
-    //3. row(2) =  represent the Comment:
+    // 3. row(2) =  represent the Comment:
     var CommentRecord_AsString = row(2).toString()
-    //4. extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
+    // 4. extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
     val CommentObj = new CommentProcessor()
     val Temp_commentTail = CommentObj.Extract_CommentTail(CommentRecord_AsString)
 
@@ -2060,8 +1384,8 @@ class VandalismDetection extends Serializable {
       val FacilityOBJ = new FacilitiesClass()
       var Str_vector_Values = FacilityOBJ.ArrayToString(vectorElements)
       str_results = Str_vector_Values
-      //CharacterFeatures = Vector_AsArrayElements
-      //new_Back_Row = Row(vectorElements)
+      // CharacterFeatures = Vector_AsArrayElements
+      // new_Back_Row = Row(vectorElements)
 
     } else {
 
@@ -2095,11 +1419,11 @@ class VandalismDetection extends Serializable {
       val FacilityOBJ = new FacilitiesClass()
       var Str_vector_Values = FacilityOBJ.ArrayToString(RatioValues)
       str_results = Str_vector_Values
-      //new_Back_Row = Row(vector_Values)
+      // new_Back_Row = Row(vector_Values)
 
     }
     // CharacterFeatures
-    //new_Back_Row
+    // new_Back_Row
     str_results.trim()
   }
 
@@ -2107,13 +1431,13 @@ class VandalismDetection extends Serializable {
   def Words_Features(row: Row): String = {
 
     var str_results = ""
-    //Row from  partitioned Pair RDD:
+    // Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
-    //Revision ID current operation:
+    // Revision ID current operation:
     var RevisionID = row(0)
-    //row(2) =  represent the Comment:
+    // row(2) =  represent the Comment:
     var CommentRecord_AsString = row(2).toString()
-    //Extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
+    // Extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
     val CommentObj = new CommentProcessor()
     val Temp_commentTail = CommentObj.Extract_CommentTail(CommentRecord_AsString)
     var tempQids = 0.0
@@ -2146,9 +1470,9 @@ class VandalismDetection extends Serializable {
       temLinks = porportion_links
     } else {
 
-      var porortion_Qids = tempQids //=0.0
-      var porportion_Lang = temlangs //=0.0
-      var porportion_links = temLinks //=0.0
+      var porortion_Qids = tempQids // =0.0
+      var porportion_Lang = temlangs // =0.0
+      var porportion_links = temLinks // =0.0
 
     }
 
@@ -2164,11 +1488,11 @@ class VandalismDetection extends Serializable {
         var Prev_commentTail = CommentObj.Extract_CommentTail(prevComment.toString())
         if (Prev_commentTail != "") {
 
-          //11.Feature Current_Previous_CommentTial_NumberSharingWords:
+          // 11.Feature Current_Previous_CommentTial_NumberSharingWords:
 
           val NumberSharingWords = WordsOBJ.Current_Previous_CommentTial_NumberSharingWords(Temp_commentTail, Prev_commentTail)
           ArrayElements(12) = NumberSharingWords.toDouble
-          //12.Feature Current_Previous_CommentTial_NumberSharingWords without Stopword:
+          // 12.Feature Current_Previous_CommentTial_NumberSharingWords without Stopword:
           val NumberSharingWordsWithoutStopwords = WordsOBJ.Current_Previous_CommentTial_NumberSharingWords_WithoutStopWords(Temp_commentTail, Prev_commentTail)
           ArrayElements(13) = NumberSharingWordsWithoutStopwords.toDouble
 
@@ -2218,8 +1542,8 @@ class VandalismDetection extends Serializable {
       str_results = Str_vector_Values
 
     }
-    //new_Back_Row
-    //Word_Features
+    // new_Back_Row
+    // Word_Features
     str_results
   }
 
@@ -2227,16 +1551,16 @@ class VandalismDetection extends Serializable {
   def Sentences_Features(row: Row): String = {
 
     var str_results = ""
-    //This will be used to save values in vector
+    // This will be used to save values in vector
     var DoubleValues = new Array[Double](4)
 
-    //1. Row from  partitioned Pair RDD:
+    // 1. Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
-    //2. Revision ID current operation:
+    // 2. Revision ID current operation:
     var RevisionID = row(0)
-    //3. row(2) =  represent the Full Comment:
+    // 3. row(2) =  represent the Full Comment:
     var CommentRecord_AsString = row(2).toString()
-    //4. extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
+    // 4. extract comment tail from the Normal comment-Depending on the paperes, we apply character feature extraction on comment Tail
     val CommentObj = new CommentProcessor()
     val Temp_commentTail = CommentObj.Extract_CommentTail(CommentRecord_AsString)
 
@@ -2249,14 +1573,14 @@ class VandalismDetection extends Serializable {
       DoubleValues(0) = comment_Tail_Length
 
       // Feature 2 similarity  between comment contain Sitelink and label :
-      //Check the language in comment that contain sitelinkword: --------------------
+      // Check the language in comment that contain sitelinkword: --------------------
       val Sitelink_inCommentObj = new SentencesFeatures()
 
       if (CommentRecord_AsString.contains("sitelink")) { // start 1 loop
-        //1. First step : get the language from comment
+        // 1. First step : get the language from comment
         val languagesitelink_from_Comment = Sitelink_inCommentObj.extract_CommentSiteLink_LanguageType(CommentRecord_AsString).trim()
 
-        //2. second step: get  the Label tage from json table :
+        // 2. second step: get  the Label tage from json table :
         if (row(9).toString() != "[]") { // start 2 loop
           // if (row(8).toString() != "") {
           val jsonStr = "\"\"\"" + row(9).toString() + "\"\"\"" // row(9) is the label record
@@ -2271,7 +1595,7 @@ class VandalismDetection extends Serializable {
             DoubleValues(1) = 0.0
           }
 
-        } // endd 2 loop 
+        } // endd 2 loop
         else {
 
           DoubleValues(1) = 0.0
@@ -2285,12 +1609,12 @@ class VandalismDetection extends Serializable {
       }
 
       // Feature 3 similarity between comment contain label word and sitelink
-      //Check the language in comment that contain Label word:-----------------------
+      // Check the language in comment that contain Label word:-----------------------
       val Label_inCommentObj = new SentencesFeatures()
       if (CommentRecord_AsString.contains("label")) {
-        //1. First step : get the language from comment
+        // 1. First step : get the language from comment
         val languageLabel_from_Comment = Label_inCommentObj.extract_CommentLabel_LanguageType(CommentRecord_AsString).trim()
-        //2. second step: get  the site link  tage from json table :
+        // 2. second step: get  the site link  tage from json table :
         if (row(13).toString() != "[]") { // start 2 loop
           val jsonStr = "\"\"\"" + row(13).toString() + "\"\"\"" // row(13) is the sitelink record
           val jsonObj: JSONObject = new JSONObject(row(13).toString())
@@ -2351,7 +1675,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //new_Back_Row
+    // new_Back_Row
     str_results
 
   }
@@ -2359,7 +1683,7 @@ class VandalismDetection extends Serializable {
   // statement Features :
   def Statement_Features(row: Row): String = {
     var full_Str_Result = ""
-    //1. row(2) =  represent the Comment:
+    // 1. row(2) =  represent the Comment:
     var fullcomment = row(2).toString()
     val StatementOBJ = new StatementFeatures()
 
@@ -2400,9 +1724,9 @@ class VandalismDetection extends Serializable {
     var str_results = ""
     var DoubleValues = new Array[Double](10) // you should change the index when add more element feature
 
-    //Row from  partitioned Pair RDD:
+    // Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
-    //row(7) =  represent the Contributor name:
+    // row(7) =  represent the Contributor name:
     var full_comment = row(2).toString()
     var contributor_Name = row(7).toString()
     var contributor_ID = row(6).toString()
@@ -2411,7 +1735,7 @@ class VandalismDetection extends Serializable {
 
       val useFeatureOBJ = new UserFeatures()
 
-      //1. Is privileged :  There are 5 cases : if one of these cases is true that mean it is privileged else it is not privileged user
+      // 1. Is privileged :  There are 5 cases : if one of these cases is true that mean it is privileged else it is not privileged user
       var flag_case1 = useFeatureOBJ.CheckName_isGlobalSysopUser(contributor_Name)
       var flag_case2 = useFeatureOBJ.CheckName_isGlobalRollBackerUser(contributor_Name)
       var flag_case3 = useFeatureOBJ.CheckName_isGlobalStewarUser(contributor_Name)
@@ -2427,7 +1751,7 @@ class VandalismDetection extends Serializable {
         DoubleValues(0) = 0.0
       }
 
-      //2. is BotUser : There are 3 cases  :
+      // 2. is BotUser : There are 3 cases  :
       var flag_case1_1 = useFeatureOBJ.CheckName_isLocalBotUser(contributor_Name)
       var flag_case2_2 = useFeatureOBJ.CheckName_isGlobalbotUser(contributor_Name)
       var flag_case3_3 = useFeatureOBJ.CheckName_isExtensionBotUser(contributor_Name)
@@ -2441,7 +1765,7 @@ class VandalismDetection extends Serializable {
         DoubleValues(1) = 0.0
       }
 
-      //3. is Bot User without BotflagUser : There is 1 case  :
+      // 3. is Bot User without BotflagUser : There is 1 case  :
       var flag_BUWBF = useFeatureOBJ.CheckName_isBotUserWithoutBotFlagUser(contributor_Name)
 
       if (flag_BUWBF == true) {
@@ -2452,7 +1776,7 @@ class VandalismDetection extends Serializable {
 
       }
 
-      //4. is Property  creator :
+      // 4. is Property  creator :
       var flagCreator = useFeatureOBJ.CheckName_isPropertyCreator(contributor_Name)
 
       if (flagCreator == true) {
@@ -2463,7 +1787,7 @@ class VandalismDetection extends Serializable {
 
       }
 
-      //5. is translator :
+      // 5. is translator :
       var flagTranslator = useFeatureOBJ.CheckName_isTranslator(contributor_Name)
       if (flagTranslator == true) {
         DoubleValues(4) = 1.0
@@ -2471,7 +1795,7 @@ class VandalismDetection extends Serializable {
         DoubleValues(4) = 0.0
       }
 
-      //6. is register user:
+      // 6. is register user:
       var flagRegistered = useFeatureOBJ.IsRegisteroUser(contributor_Name)
       if (flagRegistered == true) {
         DoubleValues(5) = 1.0
@@ -2490,13 +1814,13 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //7. IP as a long value
+    // 7. IP as a long value
     if (contributor_IP != "0") {
       DoubleValues(6) = contributor_IP.toDouble
     } else {
       DoubleValues(6) = 0.0
     }
-    //8. ID
+    // 8. ID
 
     if (contributor_ID != "0") {
       DoubleValues(7) = contributor_ID.toDouble
@@ -2504,7 +1828,7 @@ class VandalismDetection extends Serializable {
       DoubleValues(7) = 0.0
     }
 
-    //9- 10  BitrthDate  - DeatDate:
+    // 9- 10  BitrthDate  - DeatDate:
 
     var DateObj = new UserFeatures()
     var BirthDate = DateObj.IsBirthDate(full_comment)
@@ -2540,11 +1864,11 @@ class VandalismDetection extends Serializable {
 
     var str_results = ""
     var DoubleValues = new Array[Double](11)
-    //Row from  partitioned Pair RDD:
+    // Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
     var ItemOBJ = new ItemFeatures()
 
-    //1. Feature depending on Label:
+    // 1. Feature depending on Label:
     var NumberOfLabel = 0.0
     var Label_String = row(9).toString()
     if (Label_String != "[]") {
@@ -2554,7 +1878,7 @@ class VandalismDetection extends Serializable {
       NumberOfLabel = 0.0
       DoubleValues(0) = NumberOfLabel
     }
-    //2. Feature depending on Description:
+    // 2. Feature depending on Description:
     var Description_String = row(10).toString()
     var NumberOfDescription = 0.0
     if (Description_String != "[]") {
@@ -2566,7 +1890,7 @@ class VandalismDetection extends Serializable {
       DoubleValues(1) = NumberOfDescription
 
     }
-    //3. Feature depending on Aliases:
+    // 3. Feature depending on Aliases:
     var Aliases_String = row(11).toString()
     var NumberOfAliases = 0.0
     if (Aliases_String != "[]") {
@@ -2578,7 +1902,7 @@ class VandalismDetection extends Serializable {
       DoubleValues(2) = NumberOfAliases
 
     }
-    //4. Feature depending on Claims :
+    // 4. Feature depending on Claims :
     var Claims_String = row(12).toString()
     var NumberOfClaims = 0.0
     if (Claims_String != "[]") {
@@ -2590,7 +1914,7 @@ class VandalismDetection extends Serializable {
       DoubleValues(3) = NumberOfClaims
 
     }
-    //5. Feature depending on SiteLink
+    // 5. Feature depending on SiteLink
     var SiteLink_String = row(13).toString()
     var NumberOfSitelink = 0.0
     if (SiteLink_String != "[]") {
@@ -2603,7 +1927,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //6. Feature depending on Claims - statements :
+    // 6. Feature depending on Claims - statements :
     var statement_String = row(12).toString() // from claim
     var NumberOfstatement = 0.0
     if (statement_String != "[]") {
@@ -2616,7 +1940,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //7. Feature depending on Claims - References  :
+    // 7. Feature depending on Claims - References  :
     var References_String = row(12).toString() // from claim
     var NumberOfReferences = 0.0
     if (References_String != "[]") {
@@ -2628,7 +1952,7 @@ class VandalismDetection extends Serializable {
       DoubleValues(6) = NumberOfReferences
 
     }
-    //8. Feature depending on claim
+    // 8. Feature depending on claim
     var Qualifier_String = row(12).toString() // from claim
     var NumberOfQualifier = 0.0
     if (Qualifier_String != "[]") {
@@ -2654,7 +1978,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //10. Feature depending on  Site link
+    // 10. Feature depending on  Site link
     var BadgesString = row(13).toString() // from claim
     var NumberOfBadges = 0.0
     if (BadgesString != "[]") {
@@ -2667,7 +1991,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //11. Item Title (instead of Item  ID)
+    // 11. Item Title (instead of Item  ID)
     var Item_Id_Title = row(1).toString().replace("Q", "")
     var Item = Item_Id_Title.trim().toDouble
     DoubleValues(10) = Item
@@ -2688,17 +2012,17 @@ class VandalismDetection extends Serializable {
 
   def Revision_Features(row: Row): String = {
 
-    //var DoubleValues = new Array[Double](6)
+    // var DoubleValues = new Array[Double](6)
     var full_Str_Result = ""
-    //1. Row from  partitioned Pair RDD:
+    // 1. Row from  partitioned Pair RDD:
     var new_Back_Row = Row()
-    //2. Revision ID current operation:
+    // 2. Revision ID current operation:
     var RevisionID = row(0)
-    //3. row(2) =  represent the Comment:
+    // 3. row(2) =  represent the Comment:
     var fullcomment = row(2).toString()
     // DoubleValues(0) = length
 
-    //1. Revision Language :---------------------------------------------------------------------------------
+    // 1. Revision Language :---------------------------------------------------------------------------------
 
     var comment_for_Language = row(2).toString()
     val CommentLanguageOBJ = new RevisionFeatures()
@@ -2709,7 +2033,7 @@ class VandalismDetection extends Serializable {
       full_Str_Result = "NA".trim()
 
     }
-    //2. Revision Language  local:----------------------------------------------------------------------------
+    // 2. Revision Language  local:----------------------------------------------------------------------------
     if (language != "NA") {
       if (language.contains("-")) { // E.g.Revision ID = 10850 sample1
         var LocalLangArray: Array[String] = language.split("-", 2)
@@ -2724,7 +2048,7 @@ class VandalismDetection extends Serializable {
       full_Str_Result = full_Str_Result + "," + "NA"
     }
 
-    //3. Is it Latin Language or Not:-------------------------------------------------------------------------
+    // 3. Is it Latin Language or Not:-------------------------------------------------------------------------
     val revisionFeatureOBJ = new RevisionFeatures()
     val flagLatin = revisionFeatureOBJ.Check_ContainLanguageLatin_NonLatin(language)
 
@@ -2737,26 +2061,26 @@ class VandalismDetection extends Serializable {
       full_Str_Result = full_Str_Result + "," + "0.0"
     }
 
-    //4. Json Length : be care full to RDD where the json before parsed--------------------------------------
+    // 4. Json Length : be care full to RDD where the json before parsed--------------------------------------
     // var Jason_Text = row(8).toString()
 
-    //replacing_with_Quoto for cleaning the Json tag from extr tags such as <SHA>...
+    // replacing_with_Quoto for cleaning the Json tag from extr tags such as <SHA>...
     var Jason_Text = replacing_with_Quoto(row(0).toString(), row(8).toString())
     var Json_Length = Jason_Text.length()
 
     full_Str_Result = full_Str_Result + "," + Json_Length.toString()
 
-    //5. Revision Action -:-----------------------------------------------------------------------
+    // 5. Revision Action -:-----------------------------------------------------------------------
     val CommentProcessOBJ1 = new CommentProcessor()
     val actions1 = CommentProcessOBJ1.Extract_Actions_FromComments(fullcomment)
 
     var ActionsArray1: Array[String] = actions1.split("_", 2)
     var action1 = ActionsArray1(0).toString()
-    //var SubAction = ActionsArray(1)
+    // var SubAction = ActionsArray(1)
     full_Str_Result = full_Str_Result + "," + action1.trim()
-    //full_Str_Result = full_Str_Result + "," + SubAction.trim()
+    // full_Str_Result = full_Str_Result + "," + SubAction.trim()
 
-    //6.  Revision Prev-Action :-------------------------------------------------------------------------------
+    // 6.  Revision Prev-Action :-------------------------------------------------------------------------------
     if (row(19) != null) {
       var Prev_fullcomment1 = row(19).toString()
       val Prev_CommentProcessOBJ1 = new CommentProcessor()
@@ -2765,7 +2089,7 @@ class VandalismDetection extends Serializable {
       var Prev_action1 = ActionsArray1(0).trim()
       //      var Prev_SubAction = ActionsArray(1).trim()
       full_Str_Result = full_Str_Result + "," + Prev_action1.trim()
-      //full_Str_Result = full_Str_Result + "," + Prev_SubAction.trim()
+      // full_Str_Result = full_Str_Result + "," + Prev_SubAction.trim()
 
       // println(row(16).toString())
     } else {
@@ -2798,11 +2122,11 @@ class VandalismDetection extends Serializable {
     var RevisionParent = row(3).toString()
     full_Str_Result = full_Str_Result + "," + RevisionParent.toString().trim()
 
-    //9. Revision Time Stamp------------------------------------------------------------------------------------------------
+    // 9. Revision Time Stamp------------------------------------------------------------------------------------------------
     var RevisionTimeZone = row(4).toString()
     full_Str_Result = full_Str_Result + "," + RevisionTimeZone
 
-    //10. Revision Size:------------------------------------------------------------------------------------------------
+    // 10. Revision Size:------------------------------------------------------------------------------------------------
 
     var RevisionBody = row(0).toString() + row(2).toString() + row(3).toString() + row(4).toString() + row(8).toString() + row(14).toString() + row(15).toString() + row(16).toString()
     if (row(5).toString() != "0") {
@@ -2816,7 +2140,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //11. ContentType: take Action1 as input : --------------------------------------------------------------
+    // 11. ContentType: take Action1 as input : --------------------------------------------------------------
 
     val CommentProcessOBJ_New = new CommentProcessor()
     val actions_New = CommentProcessOBJ_New.Extract_Actions_FromComments(fullcomment)
@@ -2868,7 +2192,7 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //13. Time since last Revision: ----------------------------------------------------------------------
+    // 13. Time since last Revision: ----------------------------------------------------------------------
 
     if (row(21) != null) {
 
@@ -2886,11 +2210,11 @@ class VandalismDetection extends Serializable {
 
     }
 
-    //14. Comment Length:---------------------------------------
+    // 14. Comment Length:---------------------------------------
     var lengthcomment = fullcomment.length().toString()
     full_Str_Result = full_Str_Result + "," + lengthcomment
 
-    //15. Revision SubAction:
+    // 15. Revision SubAction:
     val CommentProcessOBJ2 = new CommentProcessor()
     val actions2 = CommentProcessOBJ2.Extract_Actions_FromComments(fullcomment)
 
@@ -2898,7 +2222,7 @@ class VandalismDetection extends Serializable {
     var SubAction2 = ActionsArray2(1)
     full_Str_Result = full_Str_Result + "," + SubAction2.trim()
 
-    //16.Prev_revision SubAction:
+    // 16.Prev_revision SubAction:
     if (row(19) != null) {
       var Prev_fullcomment2 = row(19).toString()
       val Prev_CommentProcessOBJ2 = new CommentProcessor()
@@ -2921,7 +2245,7 @@ class VandalismDetection extends Serializable {
 
   }
 
-  //========================
+  //  ========================
 
   def RoundDouble(va: Double): Double = {
 
@@ -2984,4 +2308,4 @@ class VandalismDetection extends Serializable {
 
   }
 
-}// endl class -------
+}
diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/WordsFeatures.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/WordsFeatures.scala
index 1cf0ee1..9462927 100644
--- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/WordsFeatures.scala
+++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/outliers/vandalismdetection/WordsFeatures.scala
@@ -1,7 +1,8 @@
 package net.sansa_stack.ml.spark.outliers.vandalismdetection
 
-import java.util.regex.{ Pattern, Matcher }
 import java.util.{ List, Arrays, ArrayList }
+import java.util.regex.{ Pattern, Matcher }
+
 import org.apache.commons.lang3.StringUtils
 
 class WordsFeatures extends Serializable {
@@ -15,53 +16,53 @@ class WordsFeatures extends Serializable {
   def Vector_Words_Feature(StrValue: String): Array[Double] = {
     var RatioValues = new Array[Double](17)
     val WordsFeature_OBJ = new WordsFeatures()
-    //1. Double for LanguageWord Ratio - ok
+    // 1. Double for LanguageWord Ratio - ok
     val LanguageWord = LanguageWordRatio_Character(StrValue)
     if (!LanguageWord.isNaN()) {
       RatioValues(0) = RoundDouble(LanguageWord)
     }
-    //2. Boolean --> Double for Contain language word - ok (1 Boolean)
+    // 2. Boolean --> Double for Contain language word - ok (1 Boolean)
     val IsContainLanguageWord = ContainLanguageWord(StrValue)
     if (IsContainLanguageWord == true) {
       RatioValues(1) = 1.0
     } else if (IsContainLanguageWord == false) {
       RatioValues(1) = 0.0
     }
-    //3.Double for LowerCaseWord Ratio - ok
+    // 3.Double for LowerCaseWord Ratio - ok
     val LowerCaseWord = LowercaseWordRation(StrValue)
     if (!LowerCaseWord.isNaN()) {
       RatioValues(2) = RoundDouble(LowerCaseWord)
     }
-    //4.Integer --> to Double for LongestWord - ok (1 Integer)
+    // 4.Integer --> to Double for LongestWord - ok (1 Integer)
     val LongWord = LongestWord(StrValue)
     if (LongWord != null) {
       val castedValue = LongWord.toDouble
       RatioValues(3) = castedValue
     }
-    //5.Boolean --> Double for word Contain URL -ok(2 boolean)
+    // 5.Boolean --> Double for word Contain URL -ok(2 boolean)
     val IsWordContainURL = ContainURLWord(StrValue)
     if (IsWordContainURL == true) {
       RatioValues(4) = 1.0
     } else if (IsWordContainURL == false) {
       RatioValues(4) = 0.0
     }
-    //6.Double for  Bad Word Ratio - ok
+    // 6.Double for  Bad Word Ratio - ok
     val BadWord = BadWordRation(StrValue)
     if (!BadWord.isNaN()) {
       RatioValues(5) = RoundDouble(BadWord)
     }
-    //7. Double for UppercaseWord Ratio -ok
+    // 7. Double for UppercaseWord Ratio -ok
     val UpperCaseWord = UppercaseWordRation(StrValue)
     if (!UpperCaseWord.isNaN()) {
       RatioValues(6) = RoundDouble(UpperCaseWord)
     }
-    //8.Double for Ban Word Ratio - ok
+    // 8.Double for Ban Word Ratio - ok
     val BanWord = BanWordRation(StrValue)
     if (!BanWord.isNaN()) {
       RatioValues(7) = RoundDouble(BanWord)
     }
 
-    //9.Boolean Femal FirstName (3 Boolean )
+    // 9.Boolean Femal FirstName (3 Boolean )
 
     val IsFemalFirstName = FemaleName_word(StrValue)
     if (IsFemalFirstName == true) {
@@ -70,7 +71,7 @@ class WordsFeatures extends Serializable {
       RatioValues(8) = 0.0
     }
 
-    //10. Boolean Male FirstName (4 Boolean)
+    // 10. Boolean Male FirstName (4 Boolean)
     val IsMaleFirstName = MaleName_word(StrValue)
     if (IsMaleFirstName == true) {
       RatioValues(9) = 1.0
@@ -78,7 +79,7 @@ class WordsFeatures extends Serializable {
       RatioValues(9) = 0.0
     }
 
-    //11. Boolean containBadWord_word (5 Boolean )
+    // 11. Boolean containBadWord_word (5 Boolean )
 
     val IsContainBad_Word = containBadWord_word(StrValue)
     if (IsContainBad_Word == true) {
@@ -87,7 +88,7 @@ class WordsFeatures extends Serializable {
       RatioValues(10) = 0.0
     }
 
-    //12. Boolean containBanWord_word (6 Boolean)
+    // 12. Boolean containBanWord_word (6 Boolean)
 
     val IsContainBan_Word = BanBuilderWordlist_word(StrValue)
     if (IsContainBan_Word == true) {
@@ -125,15 +126,66 @@ class WordsFeatures extends Serializable {
   }
 
   //1.Language Words Ratio :
-  val regex_LanguageWordRatio: String = "(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)|rmenian?|ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)|[c\\u010d](esky|e[s\\u0161]tina)\r\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]|ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano|\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ng vi[e\\u1ec7]t|[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441|[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446|[p\\u0440]\\u0443\\u0441\\u0441|[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea|[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)|\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f|\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)|\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8|\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?|([c\\u0441]\\u0440\\u043f|[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438|\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e|\\ud55c\\uad6d(\\ub9d0|\\uc5b4)|\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40|\\u092e\\u0930\\u093e\\u0920\\u0940|\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648|\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd|\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41|\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0|\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc|\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02|\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53|\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?|\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4)";
+  val regex_LanguageWordRatio: String = """(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)
+    |rmenian?|ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)
+    |b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski
+    |ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)
+    |atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)|[c\\u010d](esky|e[s\\u0161]tina)\r\n
+    |d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)
+    |speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)
+    |g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))
+    |i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)
+    |k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]
+    |ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)
+    |yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto
+    |ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano
+    |\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)
+    |erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ngv i[e\\u1ec7]t
+    |[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh
+    |(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441
+    |[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446|[p\\u0440]\\u0443\\u0441\\u0441
+    |[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea
+    |[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)
+    |\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f
+    |\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)
+    |\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8|\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?
+    |([c\\u0441]\\u0440\\u043f|[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438|\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e
+    |\\ud55c\\uad6d(\\ub9d0|\\uc5b4)|\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40
+    |\\u092e\\u0930\\u093e\\u0920\\u0940|\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648|\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd
+    |\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41|\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0|\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc
+    |\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02|\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53
+    |\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?|\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4)"""
   val pattern_LanguageWordRatio: Pattern = Pattern.compile(regex_LanguageWordRatio);
   def LanguageWordRatio_Character(str: String): Double = {
     val result: Double = WordRatio(str, pattern_LanguageWordRatio)
     result
   }
 
-  //2. Contain language word :
-  val regex_ContainLanguageWord: String = "(^|\\n)([ei]n )??(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)|rmenian?|ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)|[c\\u010d](esky|e[s\\u0161]tina)\r\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]|ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano|\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ng vi[e\\u1ec7]t|[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441|[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446|[p\\u0440]\\u0443\\u0441\\u0441|[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea|[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)|\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f|\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)|\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8|\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?|([c\\u0441]\\u0440\\u043f|[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438|\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e|\\ud55c\\uad6d(\\ub9d0|\\uc5b4)|\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40|\\u092e\\u0930\\u093e\\u0920\\u0940|\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648|\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd|\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41|\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0|\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc|\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02|\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53|\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?|\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4)( language)??($|\\n)";
+  // 2. Contain language word :
+  val regex_ContainLanguageWord: String = """(^|\\n)([ei]n )??(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)
+    |rmenian?|ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)
+    |b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski|ra[sz]il(ian?)?
+    |ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)
+    |[c\\u010d](esky|e[s\\u0161]tina)\r\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)
+    |speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)
+    |eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))
+    |ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]|ithuanian?)|m(a[ck]edon(ian?
+    |ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)
+    |p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano|\\u00e4nisch)
+    |ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)
+    |t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ng vi[e\\u1ec7]t|[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)
+    |welsh|(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441|[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446
+    |[p\\u0440]\\u0443\\u0441\\u0441|[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea
+    |[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)
+    |\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f
+    |\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)
+    |\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8|\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?|([c\\u0441]\\u0440\\u043f
+    |[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438|\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e|\\ud55c\\uad6d(\\ub9d0
+    |\\uc5b4)|\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40|\\u092e\\u0930\\u093e\\u0920\\u0940
+    |\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648|\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd|\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41
+    |\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0|\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc|\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02
+    |\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53|\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?
+    |\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4)( language)??($|\\n)"""
   val pattern_ContainLanguageWord: Pattern = Pattern.compile(regex_ContainLanguageWord);
   val matcher_ContainLanguageWord: Matcher = pattern_ContainLanguageWord.matcher("");
   def ContainLanguageWord(str: String): Boolean = {
@@ -149,20 +201,20 @@ class WordsFeatures extends Serializable {
     result
   }
 
-  //3. Upper case word Ratio:
+  // 3. Upper case word Ratio:
   def UppercaseWordRation(str: String): Double = {
     val pattern: Pattern = Pattern.compile("\\p{Lu}.*")
     val result: Double = WordRatio(str, pattern)
     result
   }
 
-  //4.  Lower case word Ratio:
+  // 4.  Lower case word Ratio:
   def LowercaseWordRation(str: String): Double = {
     val pattern: Pattern = Pattern.compile("[\\p{L}&&[^\\p{Lu}]].*")
     val result: Double = WordRatio(str, pattern)
     result
   }
-  //5.word Contain URL :
+  // 5.word Contain URL :
   val pattern_WordContainURL: Pattern = Pattern.compile("\\b(https?:\\/\\/|www\\.)\\S{10}.*", Pattern.CASE_INSENSITIVE
     | Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ)
   val matcher_WordContainURL: Matcher = pattern_WordContainURL.matcher("");
@@ -179,7 +231,7 @@ class WordsFeatures extends Serializable {
     result
   }
 
-  //6. Longest Word
+  // 6. Longest Word
   val pattern_longestWord: Pattern = Pattern.compile("\\p{IsAlphabetic}+", Pattern.CASE_INSENSITIVE
     | Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ);
   val matcher_longestWord: Matcher = pattern_WordContainURL.matcher("");
@@ -203,7 +255,7 @@ class WordsFeatures extends Serializable {
 
     max
   }
-  //7. Bad Word : It is Ok
+  // 7. Bad Word : It is Ok
   val luisVonAhnWordlist: Array[String] =
     Array("abbo", "abo",
       "abortion", "abuse", "addict", "addicts", "adult", "africa",
@@ -465,7 +517,7 @@ class WordsFeatures extends Serializable {
 
   }
 
-  //8. Contain Bad Word:It is ok
+  // 8. Contain Bad Word:It is ok
   val tokens_containbadword: List[String] = new ArrayList[String](Arrays.asList(luisVonAhnWordlist: _*))
   val patternString_containBadword: String = ".*\\b(" + StringUtils.join(tokens_containbadword, "|") + ")\\b.*"
   val pattern_containBadword: Pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ)
@@ -481,7 +533,7 @@ class WordsFeatures extends Serializable {
     results
 
   }
-  //9.Ban Builder Word:It is OK
+  // 9.Ban Builder Word:It is OK
   val BanBuilderWordlist: Array[String] = Array("$#!+", "$1ut", "$h1t",
     "$hit", "$lut", "'ho", "'hobag", "a$$", "anal", "anus", "ass",
     "assmunch", "b1tch", "ballsack", "bastard", "beaner",
@@ -629,7 +681,7 @@ class WordsFeatures extends Serializable {
     results
 
   }
-  //10 Ban word Ratio:
+  // 10 Ban word Ratio:
   val tokens_ban: List[String] = new ArrayList[String](Arrays.asList(BanBuilderWordlist: _*))
   val patternString_ban: String = StringUtils.join(tokens_ban, "|")
   val pattern_banWord: Pattern = Pattern.compile(patternString_ban)
@@ -645,8 +697,33 @@ class WordsFeatures extends Serializable {
 
   }
 
-  //11.Contain language word:It is ok
-  val regex_containLanguageWord: String = ".*(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)|rmenian?|ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)|b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski|ra[sz]il(ian?)?|ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)|[c\\u010d](esky|e[s\\u0161]tina)\r\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)|f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))|i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])|l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]|ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro|ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?| ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano|\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?|panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ng vi[e\\u1ec7]t|[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh|(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441|[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446|[p\\u0440]\\u0443\\u0441\\u0441|[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea|[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)|\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f|\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)|\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8|\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?|([c\\u0441]\\u0440\\u043f|[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438|\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e|\\ud55c\\uad6d(\\ub9d0|\\uc5b4)|\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40|\\u092e\\u0930\\u093e\\u0920\\u0940|\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648|\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd|\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41|\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0|\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc|\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02|\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53|\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?|\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4).*";
+  // 11.Contain language word:It is ok
+  val regex_containLanguageWord: String = """.*(a(frikaa?ns|lbanian?|lemanha|ng(lais|ol)|ra?b(e?|[ei]c|ian?|isc?h)|rmenian?
+    |ssamese|azeri|z[e\\u0259]rba(ijani?|ycan(ca)?|yjan)|\\u043d\\u0433\\u043b\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439)
+    |b(ahasa( (indonesia|jawa|malaysia|melayu))?|angla|as(k|qu)e|[aeo]ng[ao]?li|elarusian?|okm\\u00e5l|osanski|ra[sz]il(ian?)?
+    |ritish( kannada)?|ulgarian?)|c(ebuano|hina|hinese( simplified)?|zech|roat([eo]|ian?)|atal[a\\u00e0]n?|\\u0440\\u043f\\u0441\\u043a\\u0438|antonese)
+    |[c\\u010d](esky|e[s\\u0161]tina)\r\n|d(an(isc?h|sk)|e?uts?ch)|e(esti|ll[hi]nika|ng(els|le(ski|za)|lisc?h)|spa(g?[n\\u00f1]h?i?ol|nisc?h)|speranto|stonian|usk[ae]ra)
+    |f(ilipino|innish|ran[c\\u00e7](ais|e|ez[ao])|ren[cs]h|arsi|rancese)|g(al(ego|ician)|uja?rati|ree(ce|k)|eorgian|erman[ay]?|ilaki)|h(ayeren|ebrew|indi|rvatski|ungar(y|ian))
+    |i(celandic|ndian?|ndonesian?|ngl[e\\u00ea]se?|ngilizce|tali(ano?|en(isch)?))|ja(pan(ese)?|vanese)|k(a(nn?ada|zakh)|hmer|o(rean?|sova)|urd[i\\u00ee])
+    |l(at(in[ao]?|vi(an?|e[s\\u0161]u))|ietuvi[u\\u0173]|ithuanian?)|m(a[ck]edon(ian?|ski)|agyar|alay(alam?|sian?)?|altese|andarin|arathi|elayu|ontenegro
+    |ongol(ian?)|yanmar)|n(e(d|th)erlands?|epali|orw(ay|egian)|orsk( bokm[a\\u00e5]l)?|ynorsk)|o(landese|dia)|p(ashto|ersi?an?|ol(n?isc?h|ski)|or?tugu?[e\\u00ea]se?(( d[eo])? brasil(eiro)?
+    | ?\\(brasil\\))?|unjabi)|r(om[a\\u00e2i]ni?[a\\u0103]n?|um(ano|\\u00e4nisch)|ussi([ao]n?|sch))|s(anskrit|erbian|imple english|inha?la|lov(ak(ian?)?|en\\u0161?[c\\u010d]ina|en(e|ij?an?)|uomi)|erbisch|pagnolo?
+    |panisc?h|rbeska|rpski|venska|c?wedisc?h|hqip)|t(a(galog|mil)|elugu|hai(land)?|i[e\\u1ebf]ng vi[e\\u1ec7]t|[u\\u00fc]rk([c\\u00e7]e|isc?h|i\\u015f|ey))|u(rdu|zbek)|v(alencia(no?)?|ietnamese)|welsh
+    |(\\u0430\\u043d\\u0433\\u043b\\u0438\\u0438\\u0441|[k\\u043a]\\u0430\\u043b\\u043c\\u044b\\u043a\\u0441|[k\\u043a]\\u0430\\u0437\\u0430\\u0445\\u0441|\\u043d\\u0435\\u043c\\u0435\\u0446
+    |[p\\u0440]\\u0443\\u0441\\u0441|[y\\u0443]\\u0437\\u0431\\u0435\\u043a\\u0441)\\u043a\\u0438\\u0439( \\u044f\\u0437\\u044b\\u043a)??|\\u05e2\\u05d1\\u05e8\\u05d9\\u05ea
+    |[k\\u043a\\u049b](\\u0430\\u0437\\u0430[\\u043a\\u049b]\\u0448\\u0430|\\u044b\\u0440\\u0433\\u044b\\u0437\\u0447\\u0430|\\u0438\\u0440\\u0438\\u043b\\u043b)
+    |\\u0443\\u043a\\u0440\\u0430\\u0457\\u043d\\u0441\\u044c\\u043a(\\u0430|\\u043e\\u044e)|\\u0431(\\u0435\\u043b\\u0430\\u0440\\u0443\\u0441\\u043a\\u0430\\u044f
+    |\\u044a\\u043b\\u0433\\u0430\\u0440\\u0441\\u043a\\u0438( \\u0435\\u0437\\u0438\\u043a)?)|\\u03b5\\u03bb\\u03bb[\\u03b7\\u03b9]\\u03bd\\u03b9\\u03ba(\\u03ac|\\u03b1)
+    |\\u10e5\\u10d0\\u10e0\\u10d7\\u10e3\\u10da\\u10d8
+    |\\u0939\\u093f\\u0928\\u094d\\u0926\\u0940|\\u0e44\\u0e17\\u0e22|[m\\u043c]\\u043e\\u043d\\u0433\\u043e\\u043b(\\u0438\\u0430)?|([c\\u0441]\\u0440\\u043f
+    |[m\\u043c]\\u0430\\u043a\\u0435\\u0434\\u043e\\u043d)\\u0441\\u043a\\u0438
+    |\\u0627\\u0644\\u0639\\u0631\\u0628\\u064a\\u0629|\\u65e5\\u672c\\u8a9e|\\ud55c\\uad6d(\\ub9d0|\\uc5b4)
+    |\\u200c\\u0939\\u093f\\u0928\\u0926\\u093c\\u093f|\\u09ac\\u09be\\u0982\\u09b2\\u09be|\\u0a2a\\u0a70\\u0a1c\\u0a3e\\u0a2c\\u0a40
+    |\\u092e\\u0930\\u093e\\u0920\\u0940|\\u0c95\\u0ca8\\u0ccd\\u0ca8\\u0ca1|\\u0627\\u064f\\u0631\\u062f\\u064f\\u0648
+    |\\u0ba4\\u0bae\\u0bbf\\u0bb4\\u0bcd|\\u0c24\\u0c46\\u0c32\\u0c41\\u0c17\\u0c41|\\u0a97\\u0ac1\\u0a9c\\u0ab0\\u0abe\\u0aa4\\u0ac0
+    |\\u0641\\u0627\\u0631\\u0633\\u06cc|\\u067e\\u0627\\u0631\\u0633\\u06cc|\\u0d2e\\u0d32\\u0d2f\\u0d3e\\u0d33\\u0d02
+    |\\u067e\\u069a\\u062a\\u0648|\\u1019\\u103c\\u1014\\u103a\\u1019\\u102c\\u1018\\u102c\\u101e\\u102c|\\u4e2d\\u6587(\\u7b80\\u4f53
+    |\\u7e41\\u9ad4)?|\\u4e2d\\u6587\\uff08(\\u7b80\\u4f53?|\\u7e41\\u9ad4)\\uff09|\\u7b80\\u4f53|\\u7e41\\u9ad4).*""".stripMargin
   val pattern_forContainLanguageWord: Pattern = Pattern.compile(regex_containLanguageWord);
   val matcher_containLanguageWord: Matcher = pattern_forContainLanguageWord.matcher("");
   def containLanguageBadWord_word(str: String): Boolean = {
@@ -660,7 +737,7 @@ class WordsFeatures extends Serializable {
     results
   }
 
-  //12. Male Names: It is ok
+  // 12. Male Names: It is ok
   val MaleNames: Array[String] = Array("AARON", "ADAM", "ADRIAN",
     "ALAN", "ALBERT", "ALBERTO", "ALEX", "ALEXANDER", "ALFRED",
     "ALFREDO", "ALLAN", "ALLEN", "ALVIN", "ANDRE", "ANDREW", "ANDY",
@@ -725,7 +802,7 @@ class WordsFeatures extends Serializable {
 
   }
 
-  //13. Female Names: It is ok
+  // 13. Female Names: It is ok
   val FemaleNames: Array[String] = Array("AGNES", "ALICE",
     "ALICIA", "ALLISON", "ALMA", "AMANDA", "AMBER", "AMY", "ANA",
     "ANDREA", "ANGELA", "ANITA", "ANN", "ANNA", "ANNE", "ANNETTE",
@@ -934,10 +1011,8 @@ class WordsFeatures extends Serializable {
       }
 
     }
-
     results
   }
-
   def GetNumberofLinks(str: String): Double = {
 
     val input: String = str
@@ -971,5 +1046,4 @@ class WordsFeatures extends Serializable {
     result.toFloat
   }
   // Words features: ------ End calculation the Ratio for Words:
-
-}
\ No newline at end of file
+}