From 2cd1d7dc1520f2809f845c7d0c8014092b06c7cc Mon Sep 17 00:00:00 2001
From: mtkachenko <maksim.tkatchenko@gmail.com>
Date: Tue, 4 Dec 2018 14:27:49 +0800
Subject: [PATCH] Migrated

---
 data/amazon.csv                               |  61 ++++++
 data/amazon_extended.csv                      |  61 ++++++
 data/camera.csv                               |  13 ++
 data/icecream.csv                             |  31 +++
 data/icecream_raw.csv                         |  31 +++
 pom.xml                                       | 124 +++++++++++
 .../preferred/regression/ApplyRegression.java |  66 ++++++
 .../java/ai/preferred/regression/Command.java |  58 +++++
 .../regression/EvaluateRegression.java        |  68 ++++++
 .../ai/preferred/regression/PlotData.java     |  44 ++++
 .../regression/PlotLinearRegression.java      |  67 ++++++
 .../preferred/regression/PrintRegression.java | 110 ++++++++++
 .../java/ai/preferred/regression/Shell.java   | 146 ++++++++++++
 .../regression/TrainLinearRegression.java     |  54 +++++
 .../regression/TrainLogisticRegression.java   |  62 ++++++
 .../ai/preferred/regression/WekaUtils.java    |  25 +++
 .../regression/exercise/E00_IceCream.java     |  23 ++
 .../E01_MyFirstRegressionWithIceCream.java    |  24 ++
 .../exercise/E02_ReadingRegression.java       |  24 ++
 .../regression/exercise/E03_RawIceCream.java  |  25 +++
 .../E04_RegressionForTemperature.java         |  20 ++
 .../regression/exercise/E05_TryX2Only.java    |  23 ++
 .../exercise/E06_TryX1AndX2AndX3.java         |  22 ++
 .../regression/exercise/E07_AmazonText.java   |  23 ++
 .../regression/exercise/E08_AmazonCheap.java  |  22 ++
 .../exercise/E09_AmazonExpensive.java         |  19 ++
 .../exercise/E10_AmazonYourOwnWord.java       |  20 ++
 .../exercise/E11_CameraCategories.java        |  23 ++
 .../exercise/E12_CameraWithAutoFocus.java     |  23 ++
 .../E13_ShuffleAndPartitionIceCream.java      |  26 +++
 .../regression/exercise/E14_TrainTest.java    |  23 ++
 .../regression/exercise/E15_TestX123.java     |  21 ++
 .../exercise/E16_AmazonTrainTest.java         |  21 ++
 .../exercise/E17_AmazonLogistic.java          |  22 ++
 .../exercise/E18_LogisticRidgeRegression.java |  23 ++
 .../exercise/E19_LinearRidgeRegression.java   |  22 ++
 .../regression/exercise/E20_GrandFinale.java  |  19 ++
 .../regression/io/ARFFDataReader.java         | 113 ++++++++++
 .../preferred/regression/io/CSVInputData.java | 132 +++++++++++
 .../ai/preferred/regression/io/CSVUtils.java  |  35 +++
 .../ai/preferred/regression/pe/AddX2.java     |  40 ++++
 .../ai/preferred/regression/pe/AddX3.java     |  41 ++++
 .../ai/preferred/regression/pe/Dummy.java     |  42 ++++
 .../regression/pe/EncodeTextAsFrequency.java  | 121 ++++++++++
 .../regression/pe/EncodeValueAsOneHot.java    |  61 ++++++
 .../ai/preferred/regression/pe/Partition.java |  42 ++++
 .../regression/pe/ProcessingElement.java      |  88 ++++++++
 .../regression/pe/ProjectColumns.java         |  61 ++++++
 .../preferred/regression/pe/RemoveColumn.java |  37 ++++
 .../preferred/regression/pe/SelectEquals.java |  39 ++++
 .../ai/preferred/regression/pe/Shuffle.java   |  36 +++
 .../preferred/regression/pe/SwapColumns.java  |  41 ++++
 .../regression/pe/data/Vocabulary.java        |  50 +++++
 .../ai/preferred/regression/plot/XYChart.java |  41 ++++
 .../preferred/regression/reset/DataFiles.java | 207 ++++++++++++++++++
 src/main/resources/log4j.properties           |  22 ++
 src/test/java/.gitkeep                        |   0
 src/test/resources/.gitkeep                   |   0
 temp/.gitkeep                                 |   0
 utils/assemble.py                             |  38 ++++
 60 files changed, 2776 insertions(+)
 create mode 100644 data/amazon.csv
 create mode 100644 data/amazon_extended.csv
 create mode 100644 data/camera.csv
 create mode 100644 data/icecream.csv
 create mode 100644 data/icecream_raw.csv
 create mode 100644 pom.xml
 create mode 100644 src/main/java/ai/preferred/regression/ApplyRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/Command.java
 create mode 100644 src/main/java/ai/preferred/regression/EvaluateRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/PlotData.java
 create mode 100644 src/main/java/ai/preferred/regression/PlotLinearRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/PrintRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/Shell.java
 create mode 100644 src/main/java/ai/preferred/regression/TrainLinearRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/TrainLogisticRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/WekaUtils.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E00_IceCream.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E01_MyFirstRegressionWithIceCream.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E02_ReadingRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E03_RawIceCream.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E04_RegressionForTemperature.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E05_TryX2Only.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E06_TryX1AndX2AndX3.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E07_AmazonText.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E08_AmazonCheap.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E09_AmazonExpensive.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E10_AmazonYourOwnWord.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E11_CameraCategories.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E12_CameraWithAutoFocus.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E13_ShuffleAndPartitionIceCream.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E14_TrainTest.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E15_TestX123.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E16_AmazonTrainTest.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E17_AmazonLogistic.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E18_LogisticRidgeRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E19_LinearRidgeRegression.java
 create mode 100644 src/main/java/ai/preferred/regression/exercise/E20_GrandFinale.java
 create mode 100644 src/main/java/ai/preferred/regression/io/ARFFDataReader.java
 create mode 100644 src/main/java/ai/preferred/regression/io/CSVInputData.java
 create mode 100644 src/main/java/ai/preferred/regression/io/CSVUtils.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/AddX2.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/AddX3.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/Dummy.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/EncodeTextAsFrequency.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/EncodeValueAsOneHot.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/Partition.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/ProcessingElement.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/ProjectColumns.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/RemoveColumn.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/SelectEquals.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/Shuffle.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/SwapColumns.java
 create mode 100644 src/main/java/ai/preferred/regression/pe/data/Vocabulary.java
 create mode 100644 src/main/java/ai/preferred/regression/plot/XYChart.java
 create mode 100644 src/main/java/ai/preferred/regression/reset/DataFiles.java
 create mode 100644 src/main/resources/log4j.properties
 create mode 100644 src/test/java/.gitkeep
 create mode 100644 src/test/resources/.gitkeep
 create mode 100644 temp/.gitkeep
 create mode 100644 utils/assemble.py

diff --git a/data/amazon.csv b/data/amazon.csv
new file mode 100644
index 0000000..b6caca8
--- /dev/null
+++ b/data/amazon.csv
@@ -0,0 +1,61 @@
+Id,Rating,Text
+1,5,I only spent less than ten on these so they're good for what I paid for
+2,5,I'm in love with these glasses.
+3,5,Stylish. My kid loved them
+4,5,They came in great condition.
+5,5,These are really wonderful!
+6,5,these are GREAT quality
+7,5,She LOVES them!
+8,5,Love these.
+9,5,The quality is pretty good also.
+10,5,EXCELLENT PRODUCT
+11,5,I love them. Exactly what i wanted.
+12,5,Son love them
+13,5,He says they give him that style.
+14,5,Great value!!!
+15,5,Very complimentary!
+16,5,"Cute, great quality, good fit."
+17,5,I love these glasses!!
+18,5,they fit perfectly.
+19,5,They look expensive and the fit is perfect
+20,5,Sturdy and good looking for a great price
+21,5,Very stylish! Great accessory to compliment an outfit
+22,5,Thanks so much my grandson enjoy them.
+23,5,Daughter loves them.
+24,5,Makes me look smarter in my tinder profile !
+25,1,the side arms keep breaking
+26,1,just look soooo cheap!
+27,1,Not my style.
+28,1,Mine arrived broken!! Not worth sending back.
+29,1,Dollar store quality.
+30,1,Not like picture.
+31,1,We're cheap and broke right away. 
+32,1,"These are so cheap looking, they are unwearable."
+33,1,Very Very VERY Round ! Not at all vintage .
+34,1,It's a peace of garbage. Feels so cheap and plastic.
+35,1,feel flimsy like it would break i returned it the next day
+36,1,lens have too much glare
+37,1,they look cheaply made and plastic
+38,1,Very cheap looking
+39,1,make me headache
+40,1,Feel apart after a week of getting them prescribed.
+41,1,Really cheap looking.
+42,1,Glasses are crooked and not made correctly.
+43,1,poorly made... broke after three days
+44,1,Sunglasses were very small.
+45,1,Look fake and cheap
+46,1,lens fell out on first day.
+47,1,Not really like it!
+48,1,Delivered broken.
+49,1,"Overall, trash."
+50,1,"Were broken when I opened the box, very disappointed"
+51,1,Horrible lens fell out 2nd day!
+52,1,I don't like them.
+53,1,These hoes broke too I want my money
+54,1,Broke within the 3 days
+55,1,Little small but still good
+56,1,Super small
+57,1,Horrible desing
+58,1,It's broke
+59,1,Crooked and cheaply made.
+60,1,Poor quality
diff --git a/data/amazon_extended.csv b/data/amazon_extended.csv
new file mode 100644
index 0000000..86f9029
--- /dev/null
+++ b/data/amazon_extended.csv
@@ -0,0 +1,61 @@
+Id,Rating,Text,Verified Purchase,Helpful
+1,5,I only spent less than ten on these so they're good for what I paid for,YES,11
+2,5,I'm in love with these glasses.,YES,2
+3,5,Stylish. My kid loved them,YES,2
+4,5,They came in great condition.,NO,0
+5,5,These are really wonderful!,YES,0
+6,5,these are GREAT quality,YES,0
+7,5,She LOVES them!,YES,0
+8,5,Love these.,YES,0
+9,5,The quality is pretty good also.,YES,3
+10,5,EXCELLENT PRODUCT,YES,0
+11,5,I love them. Exactly what i wanted.,NO,0
+12,5,Son love them,YES,0
+13,5,He says they give him that style.,YES,0
+14,5,Great value!!!,YES,0
+15,5,Very complimentary!,YES,0
+16,5,"Cute, great quality, good fit.",YES,1
+17,5,I love these glasses!!,YES,0
+18,5,they fit perfectly.,YES,0
+19,5,They look expensive and the fit is perfect,NO,0
+20,5,Sturdy and good looking for a great price,YES,0
+21,5,Very stylish! Great accessory to compliment an outfit,YES,0
+22,5,Thanks so much my grandson enjoy them.,YES,0
+23,5,Daughter loves them.,YES,0
+24,5,Makes me look smarter in my tinder profile !,YES,0
+25,1,the side arms keep breaking,YES,0
+26,1,just look soooo cheap!,NO,0
+27,1,Not my style.,NO,0
+28,1,Mine arrived broken!! Not worth sending back.,YES,0
+29,1,Dollar store quality.,YES,5
+30,1,Not like picture.,NO,0
+31,1,We're cheap and broke right away.Â ,YES,0
+32,1,"These are so cheap looking, they are unwearable.",YES,0
+33,1,Very Very VERY Round ! Not at all vintage .,YES,0
+34,1,It's a peace of garbage. Feels so cheap and plastic.,YES,0
+35,1,feel flimsy like it would break i returned it the next day,NO,0
+36,1,lens have too much glare,YES,0
+37,1,they look cheaply made and plastic,YES,0
+38,1,Very cheap looking,NO,0
+39,1,make me headache,YES,0
+40,1,Feel apart after a week of getting them prescribed.,NO,0
+41,1,Really cheap looking.,YES,0
+42,1,Glasses are crooked and not made correctly.,YES,7
+43,1,poorly made... broke after three days,YES,2
+44,1,Sunglasses were very small.,NO,0
+45,1,Look fake and cheap,YES,0
+46,1,lens fell out on first day.,NO,0
+47,1,Not really like it!,YES,2
+48,1,Delivered broken.,YES,0
+49,1,"Overall, trash.",YES,0
+50,1,"Were broken when I opened the box, very disappointed",NO,0
+51,1,Horrible lens fell out 2nd day!,YES,5
+52,1,I don't like them.,NO,0
+53,1,These hoes broke too I want my money,NO,0
+54,1,Broke within the 3 days,NO,0
+55,1,Little small but still good,NO,4
+56,1,Super small,NO,0
+57,1,Horrible desing,NO,0
+58,1,It's broke,NO,0
+59,1,Crooked and cheaply made.,NO,0
+60,1,Poor quality,YES,0
diff --git a/data/camera.csv b/data/camera.csv
new file mode 100644
index 0000000..f34f56b
--- /dev/null
+++ b/data/camera.csv
@@ -0,0 +1,13 @@
+Id,Price (USD),Type,Focus
+1,949,MIRRORLESS,MANUAL
+2,99,DSLR,BOTH
+3,90,DSLR,BOTH
+4,80,DSLR,AUTO
+5,20,COMPACT,MANUAL
+6,50,COMPACT,AUTO
+7,49,COMPACT,AUTO
+8,30,COMPACT,AUTO
+9,800,MIRRORLESS,AUTO
+10,789,MIRRORLESS,MANUAL
+11,35,COMPACT,AUTO
+12,789,MIRRORLESS,BOTH
diff --git a/data/icecream.csv b/data/icecream.csv
new file mode 100644
index 0000000..2832a4d
--- /dev/null
+++ b/data/icecream.csv
@@ -0,0 +1,31 @@
+Consumption,Temperature
+0.386,5.00
+0.374,13.33
+0.393,17.22
+0.425,20.00
+0.406,20.56
+0.344,18.33
+0.327,16.11
+0.288,8.33
+0.269,0.00
+0.256,-4.44
+0.286,-2.22
+0.298,-3.33
+0.329,0.00
+0.318,4.44
+0.381,12.78
+0.381,17.22
+0.47,22.22
+0.443,22.22
+0.386,19.44
+0.342,15.56
+0.319,6.67
+0.307,4.44
+0.284,0.00
+0.326,-2.78
+0.309,-2.22
+0.359,0.56
+0.376,5.00
+0.416,11.11
+0.437,17.78
+0.548,21.67
diff --git a/data/icecream_raw.csv b/data/icecream_raw.csv
new file mode 100644
index 0000000..04e3f08
--- /dev/null
+++ b/data/icecream_raw.csv
@@ -0,0 +1,31 @@
+Id,Temperature,Consumption
+1,5.00,0.386
+2,13.33,0.374
+3,17.22,0.393
+4,20.00,0.425
+5,20.56,0.406
+6,18.33,0.344
+7,16.11,0.327
+8,8.33,0.288
+9,0.00,0.269
+10,-4.44,0.256
+11,-2.22,0.286
+12,-3.33,0.298
+13,0.00,0.329
+14,4.44,0.318
+15,12.78,0.381
+16,17.22,0.381
+17,22.22,0.47
+18,22.22,0.443
+19,19.44,0.386
+20,15.56,0.342
+21,6.67,0.319
+22,4.44,0.307
+23,0.00,0.284
+24,-2.78,0.326
+25,-2.22,0.309
+26,0.56,0.359
+27,5.00,0.376
+28,11.11,0.416
+29,17.78,0.437
+30,21.67,0.548
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..5c0ec4c
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>ai.preferred</groupId>
+  <artifactId>regression-analysis</artifactId>
+  <version>1.0-SNAPSHOT</version>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.6.0</version>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.22.0</version>
+        <dependencies>
+          <dependency>
+            <groupId>org.junit.platform</groupId>
+            <artifactId>junit-platform-surefire-provider</artifactId>
+            <version>1.2.0</version>
+          </dependency>
+          <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <version>5.2.0</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>exec-maven-plugin</artifactId>
+        <version>1.6.0</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>java</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.1.0</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <minimizeJar>false</minimizeJar>
+              <finalName>${project.artifactId}-${project.version}-jar-with-dependencies</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+    </resources>
+
+    <testResources>
+      <testResource>
+        <directory>src/test/resources</directory>
+      </testResource>
+    </testResources>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>[1.7,1.8)</version>
+    </dependency>
+    <dependency>
+      <groupId>nz.ac.waikato.cms.weka</groupId>
+      <artifactId>weka-stable</artifactId>
+      <version>3.8.3</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-csv</artifactId>
+      <version>1.6</version>
+    </dependency>
+    <dependency>
+      <groupId>args4j</groupId>
+      <artifactId>args4j</artifactId>
+      <version>2.33</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <version>3.8.1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>19.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.jfree</groupId>
+      <artifactId>jfreechart</artifactId>
+      <version>1.5.0</version>
+    </dependency>
+  </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/src/main/java/ai/preferred/regression/ApplyRegression.java b/src/main/java/ai/preferred/regression/ApplyRegression.java
new file mode 100644
index 0000000..2041ed6
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/ApplyRegression.java
@@ -0,0 +1,66 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import ai.preferred.regression.io.CSVInputData;
+import ai.preferred.regression.io.CSVUtils;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.Classifier;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+
+public class ApplyRegression extends Command {
+
+  @Option(name = "-s", aliases = {"--train"}, usage = "the path to the training data", required = true)
+  private File train;
+
+  @Option(name = "-i", aliases = {"--test"}, usage = "the path to the testing data", required = true)
+  private File test;
+
+  @Option(name = "-o", aliases = {"--output"}, usage = "the path to the output CSV file", required = true)
+  private File output;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Override
+  protected void exec() throws Exception {
+    try (final FileInputStream stream = new FileInputStream(model)) {
+      final Classifier classifier = (Classifier) SerializationHelper.read(stream);
+      final boolean nominal = WekaUtils.isLogisticClassifier(classifier);
+
+      final ARFFDataReader reader = new ARFFDataReader(train, nominal, header);
+      final Instances data = reader.read(test);
+
+      try (final CSVPrinter printer = CSVUtils.printer(output);
+           final CSVInputData csvData = CSVUtils.reader(test, header)) {
+        if (csvData.hasHeader()) {
+          printer.printRecord(csvData.getHeader());
+        }
+
+        int index = 0;
+        for (final ArrayList<String> record : csvData) {
+          final double prediction = classifier.classifyInstance(data.get(index));
+          if (nominal) {
+            record.set(0, data.classAttribute().value((int) prediction));
+          } else {
+            record.set(0, String.valueOf(prediction));
+          }
+          printer.printRecord(record);
+        }
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(ApplyRegression.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/Command.java b/src/main/java/ai/preferred/regression/Command.java
new file mode 100644
index 0000000..ca84173
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/Command.java
@@ -0,0 +1,58 @@
+package ai.preferred.regression;
+
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class Command {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(Command.class);
+
+  protected abstract void exec() throws Exception;
+
+  protected static void parseArgsAndRun(Class<? extends Command> clazz, String[] args) {
+    Command command = null;
+    try {
+      command = clazz.newInstance();
+    } catch (IllegalAccessException | InstantiationException e) {
+      System.err.println("Please check if there is the public default constructor for the class: " + clazz.getCanonicalName());
+      System.exit(1);
+    }
+
+    if (args == null) {
+      System.out.println("=========== HELP ===========");
+      System.out.println();
+      System.out.println("Processing Element: " + clazz.getSimpleName() + ".class");
+      System.out.println();
+      System.out.println("Shell.run(" + clazz.getSimpleName() + ".class, \"\");");
+      final CmdLineParser parser = new CmdLineParser(command);
+      System.out.println();
+      parser.printUsage(System.out);
+      System.out.println();
+      System.out.println("============================");
+      System.out.println();
+      System.out.println();
+      return;
+    }
+
+    final CmdLineParser parser = new CmdLineParser(command);
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      System.err.println("Command: " + clazz.getCanonicalName());
+      System.err.println(e.getMessage());
+      System.err.println();
+      parser.printUsage(System.err);
+      System.exit(1);
+    }
+
+    try {
+      command.exec();
+    } catch (Exception e) {
+      LOGGER.error("Unable to execute command (" + clazz.getCanonicalName() + "): ", e);
+      System.exit(1);
+    }
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/EvaluateRegression.java b/src/main/java/ai/preferred/regression/EvaluateRegression.java
new file mode 100644
index 0000000..3bd327a
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/EvaluateRegression.java
@@ -0,0 +1,68 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.Classifier;
+import weka.classifiers.evaluation.Evaluation;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+public class EvaluateRegression extends Command {
+
+  @Option(name = "-s", aliases = {"--train"}, usage = "the path to the training data", required = true)
+  private File train;
+
+  @Option(name = "-i", aliases = {"--test"}, usage = "the path to the testing data", required = true)
+  private File test;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-v", aliases = {"--verbose"}, usage = "verbosity level (0 - short, 1 - default, 2 - detailed)")
+  private int verbose = 1;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Override
+  protected void exec() throws Exception {
+    try (final FileInputStream stream = new FileInputStream(model)) {
+      final Classifier classifier = (Classifier) SerializationHelper.read(stream);
+      final boolean nominal = WekaUtils.isLogisticClassifier(classifier);
+
+      final ARFFDataReader reader = new ARFFDataReader(train, nominal, header);
+      final Instances data = reader.read(test);
+
+      final Evaluation eval = new Evaluation(data);
+      eval.evaluateModel(classifier, data);
+      if (nominal) {
+        if (verbose <= 0) {
+          System.out.println(eval.pctCorrect());
+        } else if (verbose == 1) {
+          System.out.println("ACCURACY = " + eval.pctCorrect());
+        } else {
+          System.out.println();
+          System.out.println("CLASS\tPRECISION\tRECALL\tF-MEASURE");
+          for (int i = 0; i < data.classAttribute().numValues(); i++) {
+            System.out.printf("%s\t%f\t%f\t%f", data.classAttribute().value(i), eval.precision(i), eval.recall(i), eval.fMeasure(i));
+            System.out.println();
+          }
+        }
+      } else {
+        if (verbose <= 0) {
+          System.out.println(eval.rootMeanSquaredError());
+        } else {
+          System.out.println("RMSE = " + eval.rootMeanSquaredError());
+        }
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(EvaluateRegression.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/PlotData.java b/src/main/java/ai/preferred/regression/PlotData.java
new file mode 100644
index 0000000..d3a0b71
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/PlotData.java
@@ -0,0 +1,44 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import ai.preferred.regression.plot.XYChart;
+import org.jfree.data.xy.XYSeries;
+import org.kohsuke.args4j.Option;
+import weka.core.Instance;
+import weka.core.Instances;
+
+import javax.swing.*;
+import java.io.File;
+
+public class PlotData extends Command {
+
+  @Option(name = "-i", aliases = {"--input"}, usage = "the path to the input CSV file", required = true)
+  private File input;
+
+  @Option(name = "-n", aliases = {"--name"}, usage = "the name of the plot")
+  private String name = "DATA";
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Override
+  protected void exec() throws Exception {
+    final ARFFDataReader reader = new ARFFDataReader(input, false, header);
+    final Instances data = reader.read(input);
+
+    final XYSeries dataSeries = new XYSeries("DATA");
+    for (final Instance datum : data) {
+      dataSeries.add(datum.value(1), datum.value(0));
+    }
+
+    final XYChart chart = new XYChart(name, dataSeries, new XYSeries("REGRESSION"));
+    chart.pack();
+    chart.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
+    chart.setVisible(true);
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(PlotData.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/PlotLinearRegression.java b/src/main/java/ai/preferred/regression/PlotLinearRegression.java
new file mode 100644
index 0000000..a121352
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/PlotLinearRegression.java
@@ -0,0 +1,67 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import ai.preferred.regression.plot.XYChart;
+import org.jfree.data.xy.XYSeries;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.LinearRegression;
+import weka.core.Instance;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+
+import javax.swing.*;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+public class PlotLinearRegression extends Command {
+
+  @Option(name = "-i", aliases = {"--input"}, usage = "the path to the input CSV file", required = true)
+  private File input;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-n", aliases = {"--name"}, usage = "the name of the plot")
+  private String name = "Y = alpha * X + beta";
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Override
+  protected void exec() throws Exception {
+    try (final FileInputStream stream = new FileInputStream(model)) {
+      final Classifier classifier = (Classifier) SerializationHelper.read(stream);
+      if (!(classifier instanceof LinearRegression)) {
+        throw new IOException("The model is neither LogisticRegression nor LinearRegression!");
+      }
+      final double[] w = ((LinearRegression) classifier).coefficients();
+
+      if (w.length != 3) {
+        throw new IOException("We can plot only linear functions!");
+      }
+
+      final ARFFDataReader reader = new ARFFDataReader(input, false, header);
+      final Instances data = reader.read(input);
+
+      final XYSeries dataSeries = new XYSeries("DATA");
+      for (final Instance datum : data) {
+        dataSeries.add(datum.value(1), datum.value(0));
+      }
+      final XYSeries regressionSeries = new XYSeries("REGRESSION");
+      regressionSeries.add(dataSeries.getMinX(), w[1] * dataSeries.getMinX() + w[2]);
+      regressionSeries.add(dataSeries.getMaxX(), w[1] * dataSeries.getMaxX() + w[2]);
+
+      final XYChart chart = new XYChart(name, dataSeries, regressionSeries);
+      chart.pack();
+      chart.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
+      chart.setVisible(true);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(PlotLinearRegression.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/PrintRegression.java b/src/main/java/ai/preferred/regression/PrintRegression.java
new file mode 100644
index 0000000..1e417cc
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/PrintRegression.java
@@ -0,0 +1,110 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.LinearRegression;
+import weka.classifiers.functions.Logistic;
+import weka.core.Attribute;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.RemoveUseless;
+import weka.filters.unsupervised.attribute.ReplaceMissingValues;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.BitSet;
+
+public class PrintRegression extends Command {
+
+  @Option(name = "-i", aliases = {"--input"}, usage = "the path to the input CSV file", required = true)
+  private File input;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  private void printSignature(ArrayList<Attribute> signature) {
+    for (int i = 1; i < signature.size(); i++) {
+      System.out.print(signature.get(i).name() + "\t");
+    }
+    System.out.println("Bias");
+  }
+
+  @Override
+  protected void exec() throws Exception {
+    System.out.println();
+    try (final FileInputStream stream = new FileInputStream(model)) {
+      final Classifier classifier = (Classifier) SerializationHelper.read(stream);
+
+      if (classifier instanceof LinearRegression) {
+        final ARFFDataReader reader = new ARFFDataReader(input, false, header);
+        final Instances instances = preprocess(reader.read(input));
+
+        final BitSet ignore = new BitSet(instances.numAttributes());
+        for (int i = 0; i < instances.numAttributes(); i++) {
+          if (i != instances.classIndex()) {
+            if (Math.sqrt(instances.variance(i)) == 0) {
+              ignore.set(i);
+            }
+          }
+        }
+
+        final double[] w = ((LinearRegression) classifier).coefficients();
+        System.out.printf("%-20s W", "FEATURE");
+        System.out.println();
+        for (int i = 1; i < instances.numAttributes(); i++) {
+          if (ignore.get(i)) {
+            continue;
+          }
+          System.out.printf("%-20s %.6f", instances.attribute(i).name(), w[i]);
+          System.out.println();
+        }
+        System.out.printf("%-20s %.6f", "Bias", w[instances.numAttributes()]);
+        System.out.println();
+      } else if (classifier instanceof Logistic) {
+        final ARFFDataReader reader = new ARFFDataReader(input, true, header);
+        final Instances instances = preprocess(reader.read(input));
+
+        final double[][] w = ((Logistic) classifier).coefficients();
+        for (int i = 0; i < instances.classAttribute().numValues(); i++) {
+          System.out.printf("%s %s", "CLASS[" + i + "] =", instances.classAttribute().value(i));
+          System.out.println();
+        }
+        System.out.println();
+
+        System.out.printf("%-20s W", "FEATURE");
+        System.out.println();
+        for (int i = 1; i < instances.numAttributes(); i++) {
+          System.out.printf("%-20s %.6f", instances.attribute(i).name(), w[i][0]);
+          System.out.println();
+        }
+        System.out.printf("%-20s %.6f", "Bias", w[0][0]);
+        System.out.println();
+      } else {
+        throw new RuntimeException("We can process only regression models!");
+      }
+    }
+  }
+
+  private Instances preprocess(Instances instances) throws Exception {
+    final ReplaceMissingValues replaceMissingValues = new ReplaceMissingValues();
+    replaceMissingValues.setInputFormat(instances);
+    instances = Filter.useFilter(instances, replaceMissingValues);
+
+    final RemoveUseless removeUseless = new RemoveUseless();
+    removeUseless.setInputFormat(instances);
+    instances = Filter.useFilter(instances, removeUseless);
+
+    return instances;
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(PrintRegression.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/Shell.java b/src/main/java/ai/preferred/regression/Shell.java
new file mode 100644
index 0000000..436d4cc
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/Shell.java
@@ -0,0 +1,146 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.reset.DataFiles;
+import com.google.common.io.Files;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
+public class Shell {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(Shell.class);
+
+  public static void reset() {
+    final File tempDir = new File("temp");
+    mkdir(tempDir);
+    for (final File file : Objects.requireNonNull(tempDir.listFiles())) {
+      if (!file.getName().startsWith(".") && !file.delete()) {
+        LOGGER.error("Unable to delete: {}", file);
+      }
+    }
+    mkdir(tempDir);
+
+    File dataDir = new File("data");
+    mkdir(dataDir);
+    write(new File(dataDir, "icecream.csv"), DataFiles.ICECREAM_CSV);
+    write(new File(dataDir, "icecream_raw.csv"), DataFiles.ICECREAM_RAW_CSV);
+    write(new File(dataDir, "amazon.csv"), DataFiles.AMAZON_CSV);
+    write(new File(dataDir, "camera.csv"), DataFiles.CAMERA_CSV);
+    write(new File(dataDir, "amazon_extended.csv"), DataFiles.AMAZON_EXTENDED);
+  }
+
+  private static void write(File file, String content) {
+    try (final PrintWriter writer = new PrintWriter(file, "UTF-8")) {
+      writer.write(content);
+    } catch (FileNotFoundException | UnsupportedEncodingException e) {
+      LOGGER.error("Unable to reset file {}: {}", file, e);
+    }
+  }
+
+  private static void mkdir(File tempDir) {
+    if (!tempDir.exists() && !tempDir.mkdirs()) {
+      LOGGER.error("Unable to mkdir: {}");
+    }
+  }
+
+  public static void help(Class<?> clazz) {
+    try {
+      final Method method = clazz.getMethod("main", String[].class);
+      method.invoke(null, (Object) null);
+    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
+      LOGGER.error("Unable to execute {}: {}", clazz, e);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static void copyFile(String src, String dst) {
+    try {
+      Files.copy(new File(src), new File(dst));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static void run(Class<?> clazz, String... args) {
+    try {
+      final Method method = clazz.getMethod("main", String[].class);
+      method.invoke(null, (Object) args);
+    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
+      LOGGER.error("Unable to execute {}: {}", clazz, e);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static void run(Class<?> clazz, String args) {
+    run(clazz, args.trim().split("\\s+"));
+  }
+
+  private static Class<?> pe(String name) {
+    final String className = "ai.preferred.regression.pe." + name;
+    try {
+      return Class.forName(className);
+    } catch (ClassNotFoundException e) {
+      LOGGER.info("Could not find PE: {}", className);
+      return null;
+    }
+  }
+
+  private static Class<?> command(String name) {
+    final String className = "ai.preferred.regression." + name;
+    try {
+      return Class.forName(className);
+    } catch (ClassNotFoundException e) {
+      LOGGER.info("Could not find command: {}", className);
+      return null;
+    }
+  }
+
+  public static void exec(String filename) {
+    try (final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), StandardCharsets.UTF_8))) {
+      String line;
+      while (null != (line = reader.readLine())) {
+        line = line.trim();
+        if (line.isEmpty()) {
+          continue;
+        }
+        final String[] command = line.split("\\s+", 2);
+        final String name = command[0];
+        final String args = command[1];
+
+        Class<?> clazz = pe(name);
+        if (clazz == null) {
+          clazz = command(name);
+        }
+
+        if (clazz == null) {
+          LOGGER.error("Unable to execute command: {}", name);
+          return;
+        }
+
+        run(clazz, args);
+      }
+    } catch (FileNotFoundException e) {
+      LOGGER.error("Unable to find input file: {}", filename);
+    } catch (IOException e) {
+      LOGGER.error("Execution error: ", e);
+    }
+  }
+
+  public static void main(String[] args) {
+    for (final String arg : args) {
+      exec(arg);
+    }
+  }
+
+  private Shell() {
+    throw new AssertionError();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/TrainLinearRegression.java b/src/main/java/ai/preferred/regression/TrainLinearRegression.java
new file mode 100644
index 0000000..bfd5bf1
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/TrainLinearRegression.java
@@ -0,0 +1,54 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.evaluation.Evaluation;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+
+import java.io.File;
+import java.io.FileOutputStream;
+
+public class TrainLinearRegression extends Command {
+
+  @Option(name = "-i", aliases = {"--train"}, usage = "the path to the training data in CSV format", required = true)
+  private File input;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the output path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Option(name = "-r", aliases = {"--ridge"}, usage = "the ridge parameter")
+  private double ridge = 1.0;
+
+  @Option(name = "-v", aliases = {"--verbose"}, usage = "verbosity level (-1 - disable, 0 - short, 1 - default)")
+  private int verbose = 1;
+
+  @Override
+  protected void exec() throws Exception {
+    final ARFFDataReader reader = new ARFFDataReader(input, false, header);
+    final Instances data = reader.read(input);
+    final weka.classifiers.functions.LinearRegression classifier = new weka.classifiers.functions.LinearRegression();
+    classifier.setRidge(ridge);
+    classifier.buildClassifier(data);
+
+    final Evaluation eval = new Evaluation(data);
+    eval.evaluateModel(classifier, data);
+    if (verbose <= -1) {
+      // output disabled
+    } else if (verbose == 0) {
+      System.out.println(eval.rootMeanSquaredError());
+    } else {
+      System.out.println("RMSE[TRAINING] = " + eval.rootMeanSquaredError());
+    }
+
+    SerializationHelper.write(new FileOutputStream(model), classifier);
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(TrainLinearRegression.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/TrainLogisticRegression.java b/src/main/java/ai/preferred/regression/TrainLogisticRegression.java
new file mode 100644
index 0000000..fb8f4d3
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/TrainLogisticRegression.java
@@ -0,0 +1,62 @@
+package ai.preferred.regression;
+
+import ai.preferred.regression.io.ARFFDataReader;
+import org.kohsuke.args4j.Option;
+import weka.classifiers.evaluation.Evaluation;
+import weka.core.Instances;
+import weka.core.SerializationHelper;
+
+import java.io.File;
+import java.io.FileOutputStream;
+
+public class TrainLogisticRegression extends Command {
+
+  @Option(name = "-i", aliases = {"--train"}, usage = "the path to the training data in CSV format", required = true)
+  private File input;
+
+  @Option(name = "-m", aliases = {"--model"}, usage = "the output path to the model file", required = true)
+  private File model;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  @Option(name = "-r", aliases = {"--ridge"}, usage = "the ridge parameter")
+  private double ridge = 1.0;
+
+  @Option(name = "-v", aliases = {"--verbose"}, usage = "verbosity level (-1 - disable, 0 - short, 1 - default, 2 - detailed)")
+  private int verbose = 1;
+
+  @Override
+  protected void exec() throws Exception {
+    final ARFFDataReader reader = new ARFFDataReader(input, true, header);
+    final Instances data = reader.read(input);
+    final weka.classifiers.functions.Logistic classifier = new weka.classifiers.functions.Logistic();
+    classifier.setRidge(ridge);
+    classifier.buildClassifier(data);
+
+    final Evaluation eval = new Evaluation(data);
+    eval.evaluateModel(classifier, data);
+    if (verbose <= -1) {
+      // output disabled
+    } else if (verbose == 0) {
+      System.out.println(eval.pctCorrect());
+    } else if (verbose == 1) {
+      System.out.println("ACCURACY[TRAINING] = " + eval.pctCorrect());
+    } else {
+      System.out.println();
+      System.out.println("CLASS\tPRECISION\tRECALL\tF-MEASURE");
+      for (int i = 0; i < data.classAttribute().numValues(); i++) {
+        System.out.printf("%s\t%f\t%f\t%f", data.classAttribute().value(i), eval.precision(i), eval.recall(i), eval.fMeasure(i));
+        System.out.println();
+      }
+    }
+
+    SerializationHelper.write(new FileOutputStream(model), classifier);
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(TrainLogisticRegression.class, args);
+  }
+
+
+}
diff --git a/src/main/java/ai/preferred/regression/WekaUtils.java b/src/main/java/ai/preferred/regression/WekaUtils.java
new file mode 100644
index 0000000..8f891d5
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/WekaUtils.java
@@ -0,0 +1,25 @@
+package ai.preferred.regression;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.LinearRegression;
+import weka.classifiers.functions.Logistic;
+
+public class WekaUtils {
+
+  static boolean isLogisticClassifier(Classifier classifier) {
+    boolean nominal;
+    if (classifier instanceof Logistic) {
+      nominal = true;
+    } else if (classifier instanceof LinearRegression) {
+      nominal = false;
+    } else {
+      throw new IllegalStateException("The model is neither LogisticRegression nor LinearRegression!");
+    }
+    return nominal;
+  }
+
+  private WekaUtils() {
+    throw new AssertionError();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E00_IceCream.java b/src/main/java/ai/preferred/regression/exercise/E00_IceCream.java
new file mode 100644
index 0000000..ff12e33
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E00_IceCream.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.PlotData;
+import ai.preferred.regression.Shell;
+
+public class E00_IceCream {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * Run this class to plot the input data, take a look at it!
+   * You can open the data file in Excel or Google Spreadsheet.
+   * <p>
+   * CHECK: Is it possible to approximate this data with a linear function?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.run(PlotData.class, "-i data/icecream.csv -n IceCream");
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E01_MyFirstRegressionWithIceCream.java b/src/main/java/ai/preferred/regression/exercise/E01_MyFirstRegressionWithIceCream.java
new file mode 100644
index 0000000..ced11b0
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E01_MyFirstRegressionWithIceCream.java
@@ -0,0 +1,24 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.PlotLinearRegression;
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.TrainLinearRegression;
+
+public class E01_MyFirstRegressionWithIceCream {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * Train a linear regression on 'icecream.csv'. Plot the regression line.
+   * <p>
+   * CHECK: What is the value of RMSE[TRAINING] for this dataset?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.run(TrainLinearRegression.class, "-i data/icecream.csv -m temp/icecream.model");
+    Shell.run(PlotLinearRegression.class, "-i data/icecream.csv -m temp/icecream.model");
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E02_ReadingRegression.java b/src/main/java/ai/preferred/regression/exercise/E02_ReadingRegression.java
new file mode 100644
index 0000000..57ebe33
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E02_ReadingRegression.java
@@ -0,0 +1,24 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.PrintRegression;
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.TrainLinearRegression;
+
+public class E02_ReadingRegression {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * You can print the regression weights, to understand it a bit better!
+   * <p>
+   * CHECK: What is the value of the regression when Temperature is 0?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.run(TrainLinearRegression.class, "-i data/icecream.csv -m temp/icecream.model");
+    Shell.run(PrintRegression.class, "-i data/icecream.csv -m temp/icecream.model");
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E03_RawIceCream.java b/src/main/java/ai/preferred/regression/exercise/E03_RawIceCream.java
new file mode 100644
index 0000000..2c73422
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E03_RawIceCream.java
@@ -0,0 +1,25 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.RemoveColumn;
+import ai.preferred.regression.pe.SwapColumns;
+
+public class E03_RawIceCream {
+
+  /**
+   * DATA: data/icecream_raw.csv ; data/icecream.csv
+   * <p>
+   * TODO:
+   * Often, data come in a format which is not suitable for analysis or for building a regression.
+   * Convert 'icecream_raw.csv' to make it look like 'icecream.csv'.
+   * <p>
+   * CHECK: Should you use RemoveColumn or SwapColumns as the first step?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(RemoveColumn.class);
+    Shell.help(SwapColumns.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E04_RegressionForTemperature.java b/src/main/java/ai/preferred/regression/exercise/E04_RegressionForTemperature.java
new file mode 100644
index 0000000..d4477f4
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E04_RegressionForTemperature.java
@@ -0,0 +1,20 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E04_RegressionForTemperature {
+
+  /**
+   * DATA: data/icecream_raw.csv
+   * <p>
+   * TODO:
+   * Train and plot a regression predicting temperature based on consumption.
+   * Plot it!
+   * <p>
+   * CHECK: What is the difference between consumption-regression and temperature-regression?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E05_TryX2Only.java b/src/main/java/ai/preferred/regression/exercise/E05_TryX2Only.java
new file mode 100644
index 0000000..144553f
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E05_TryX2Only.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.AddX2;
+
+public class E05_TryX2Only {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * Add column Temperature^2 and train linear regression to predict consumption based only on Temperature^2 feature.
+   * Plot the trained regression!
+   * <p>
+   * CHECK: Is RMSE[TRAINING] different from 'E01_MyFirstRegressionWithIceCream'?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(AddX2.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E06_TryX1AndX2AndX3.java b/src/main/java/ai/preferred/regression/exercise/E06_TryX1AndX2AndX3.java
new file mode 100644
index 0000000..b200f69
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E06_TryX1AndX2AndX3.java
@@ -0,0 +1,22 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.AddX3;
+
+public class E06_TryX1AndX2AndX3 {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * Add columns Temperature^2 and Temperature^3 and train linear regression using all the parameters!
+   * <p>
+   * CHECK: Check RMSE[TRAINING] again, is it any different?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+    // TODO: implement AddX3.class, hint: take a look at the AddX2 class
+    Shell.help(AddX3.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E07_AmazonText.java b/src/main/java/ai/preferred/regression/exercise/E07_AmazonText.java
new file mode 100644
index 0000000..23a8921
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E07_AmazonText.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.EncodeTextAsFrequency;
+
+public class E07_AmazonText {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Take a loot at 'amazon.csv', one of the columns contains text.
+   * Convert it into word frequencies using EncodeTextAsFrequency.class.
+   * <p>
+   * CHECK: How many columns does the new dataset have after conversion?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(EncodeTextAsFrequency.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E08_AmazonCheap.java b/src/main/java/ai/preferred/regression/exercise/E08_AmazonCheap.java
new file mode 100644
index 0000000..c50d09f
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E08_AmazonCheap.java
@@ -0,0 +1,22 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.ProjectColumns;
+
+public class E08_AmazonCheap {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Train a linear regression for rating prediction based on word "cheap" only! Plot it!
+   * <p>
+   * CHECK: What are the regression parameters? Is word "cheap" a good predictor?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(ProjectColumns.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E09_AmazonExpensive.java b/src/main/java/ai/preferred/regression/exercise/E09_AmazonExpensive.java
new file mode 100644
index 0000000..c416c0a
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E09_AmazonExpensive.java
@@ -0,0 +1,19 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E09_AmazonExpensive {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Train linear regression for rating prediction based on word "expensive" only. Plot it!
+   * <p>
+   * CHECK: What are the regression parameters? Is word "expensive" a good predictor?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E10_AmazonYourOwnWord.java b/src/main/java/ai/preferred/regression/exercise/E10_AmazonYourOwnWord.java
new file mode 100644
index 0000000..21dbb27
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E10_AmazonYourOwnWord.java
@@ -0,0 +1,20 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E10_AmazonYourOwnWord {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Train a linear regression for rating prediction based on your own word. Plot it!
+   * We will discuss it!
+   * <p>
+   * CHECK: What are the regression parameters? Is your word a good predictor?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E11_CameraCategories.java b/src/main/java/ai/preferred/regression/exercise/E11_CameraCategories.java
new file mode 100644
index 0000000..e857ba6
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E11_CameraCategories.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.EncodeValueAsOneHot;
+
+/**
+ * DATA: data/camera.csv
+ * <p>
+ * TODO:
+ * Take a look at 'camera.csv'. It has a lot of categorical data, which is to be
+ * processed and represented as 0-1 values.
+ * <p>
+ * CHECK: How many columns does the dataset have after processing?
+ */
+public class E11_CameraCategories {
+
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(EncodeValueAsOneHot.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E12_CameraWithAutoFocus.java b/src/main/java/ai/preferred/regression/exercise/E12_CameraWithAutoFocus.java
new file mode 100644
index 0000000..a512889
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E12_CameraWithAutoFocus.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.SelectEquals;
+
+public class E12_CameraWithAutoFocus {
+
+  /**
+   * DATA: data/camera.csv
+   * <p>
+   * TODO:
+   * We are interested in the subset of 'camera.csv', the cameras with auto focus.
+   * Select this subset and train a linear regression to predict price based on camera type.
+   * <p>
+   * CHECK: How many rows does the dataset have after processing?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(SelectEquals.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E13_ShuffleAndPartitionIceCream.java b/src/main/java/ai/preferred/regression/exercise/E13_ShuffleAndPartitionIceCream.java
new file mode 100644
index 0000000..59f8e1d
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E13_ShuffleAndPartitionIceCream.java
@@ -0,0 +1,26 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.pe.Partition;
+import ai.preferred.regression.pe.Shuffle;
+
+public class E13_ShuffleAndPartitionIceCream {
+
+  /**
+   * DATA: data/icecream.csv
+   * <p>
+   * TODO:
+   * We are back to 'icecream.csv'.
+   * Shuffle and partition the data in proportion 80/20, 80% is for training data and 20% is for testing data.
+   * Plot the data splits.
+   * <p>
+   * CHECK: How many rows are there in the training and testing datasets?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(Shuffle.class);
+    Shell.help(Partition.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E14_TrainTest.java b/src/main/java/ai/preferred/regression/exercise/E14_TrainTest.java
new file mode 100644
index 0000000..dfe9dda
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E14_TrainTest.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.EvaluateRegression;
+import ai.preferred.regression.Shell;
+
+public class E14_TrainTest {
+
+  /**
+   * DATA: data/icecream.txt
+   * <p>
+   * TODO:
+   * Shuffle and partition the data in proportion 60/40, 60% is for the training data and 40% is for the testing data.
+   * Train a regression on the training data and evaluate it on the testing data.
+   * <p>
+   * CHECK: Is RMSE (on testing) > RMSE[TRAINING] or RMSE (on testing) < RMSE[TRAINING]?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(EvaluateRegression.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E15_TestX123.java b/src/main/java/ai/preferred/regression/exercise/E15_TestX123.java
new file mode 100644
index 0000000..d883f4a
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E15_TestX123.java
@@ -0,0 +1,21 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E15_TestX123 {
+
+  /**
+   * DATA: data/icecream.txt
+   * <p>
+   * TODO:
+   * Let's continue with our previous split.
+   * Add more features to the dataset: Temperature^2 and Temperature^3.
+   * Train a regression model on the training data and evaluate in on the testing data.
+   * <p>
+   * CHECK: Is RMSE (with more features) > RMSE (with only one feature)?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E16_AmazonTrainTest.java b/src/main/java/ai/preferred/regression/exercise/E16_AmazonTrainTest.java
new file mode 100644
index 0000000..3173e7c
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E16_AmazonTrainTest.java
@@ -0,0 +1,21 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E16_AmazonTrainTest {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Let's go to the amazon data: 'amazon.csv'.
+   * Split the data in proportion 80/20.
+   * Build a regression on the training split and evaluate it on the testing.
+   * <p>
+   * CHECK: Is RMSE (on testing) > RMSE[TRAINING] or RMSE (on testing) < RMSE[TRAINING]?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E17_AmazonLogistic.java b/src/main/java/ai/preferred/regression/exercise/E17_AmazonLogistic.java
new file mode 100644
index 0000000..d6717a0
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E17_AmazonLogistic.java
@@ -0,0 +1,22 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.TrainLogisticRegression;
+
+public class E17_AmazonLogistic {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Build and evaluate a logistic regression model. Data split: 80/20.
+   * <p>
+   * CHECK: Is ACCURACY (on testing) > ACCURACY[TRAINING] or ACCURACY (on testing) < ACCURACY[TRAINING]?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(TrainLogisticRegression.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E18_LogisticRidgeRegression.java b/src/main/java/ai/preferred/regression/exercise/E18_LogisticRidgeRegression.java
new file mode 100644
index 0000000..9cf5e30
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E18_LogisticRidgeRegression.java
@@ -0,0 +1,23 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.TrainLogisticRegression;
+
+
+public class E18_LogisticRidgeRegression {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Build and evaluate a logistic regression model with ridge = {0.1, 1.0, 10.0}. Data split: 80/20.
+   * <p>
+   * CHECK: Which ridge parameter gives the best ACCURACY (on testing)?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(TrainLogisticRegression.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E19_LinearRidgeRegression.java b/src/main/java/ai/preferred/regression/exercise/E19_LinearRidgeRegression.java
new file mode 100644
index 0000000..5051965
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E19_LinearRidgeRegression.java
@@ -0,0 +1,22 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+import ai.preferred.regression.TrainLinearRegression;
+
+public class E19_LinearRidgeRegression {
+
+  /**
+   * DATA: data/amazon.csv
+   * <p>
+   * TODO:
+   * Build and evaluate a linear regression model with ridge = {0.1, 1.0, 10.0}. Data split: 80/20.
+   * <p>
+   * CHECK: Which ridge parameter gives the best RMSE (on testing)?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+
+    Shell.help(TrainLinearRegression.class);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/exercise/E20_GrandFinale.java b/src/main/java/ai/preferred/regression/exercise/E20_GrandFinale.java
new file mode 100644
index 0000000..97e85fc
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/exercise/E20_GrandFinale.java
@@ -0,0 +1,19 @@
+package ai.preferred.regression.exercise;
+
+import ai.preferred.regression.Shell;
+
+public class E20_GrandFinale {
+
+  /**
+   * DATA: data/amazon_extended.csv
+   * <p>
+   * TODO:
+   * Build and evaluate a regression model for rating prediction! Data split: 80/20.
+   * <p>
+   * CHECK: What is the best ACCURACY (on testing) you can get?
+   */
+  public static void main(String[] args) {
+    Shell.reset();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/io/ARFFDataReader.java b/src/main/java/ai/preferred/regression/io/ARFFDataReader.java
new file mode 100644
index 0000000..b00b98f
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/io/ARFFDataReader.java
@@ -0,0 +1,113 @@
+package ai.preferred.regression.io;
+
+import weka.core.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Set;
+import java.util.TreeSet;
+
+public class ARFFDataReader {
+
+  private static double parseDouble(ArrayList<String> record, int row, int col) throws IOException {
+    try {
+      return Double.parseDouble(record.get(col));
+    } catch (NumberFormatException | NullPointerException e) {
+      throw new IOException("A number expected! (row = " + row + "; col = " + col + ")");
+    }
+  }
+
+  private final boolean nominal;
+  private final boolean parseHeader;
+  private final ArrayList<Attribute> signature;
+
+  public ARFFDataReader(File signatureFile, boolean nominal, boolean parseHeader) throws IOException {
+    this.nominal = nominal;
+    this.parseHeader = parseHeader;
+    try (final CSVInputData data = new CSVInputData(signatureFile, parseHeader)) {
+      final ArrayList<Attribute> signature = new ArrayList<>();
+      if (nominal) {
+        final Set<String> attributeValueSet = new TreeSet<>();
+        ArrayList<String> firstRecord = null;
+        int row = parseHeader ? 1 : 0;
+        for (final ArrayList<String> record : data) {
+          if (firstRecord == null) {
+            firstRecord = record;
+          }
+          attributeValueSet.add(record.get(0));
+          for (int col = 1; col < record.size(); col++) {
+            parseDouble(record, row, col);
+          }
+          row++;
+        }
+        if (firstRecord == null) {
+          throw new IOException("There is no records in the CSV file!");
+        }
+        if (data.hasHeader()) {
+          final ArrayList<String> header = data.getHeader();
+          signature.add(new Attribute(header.get(0), new ArrayList<>(attributeValueSet)));
+          for (int i = 1; i < header.size(); i++) {
+            signature.add(new Attribute(header.get(i)));
+          }
+        } else {
+          signature.add(new Attribute("Y", new ArrayList<>(attributeValueSet)));
+          for (int i = 1; i < firstRecord.size(); i++) {
+            signature.add(new Attribute("X" + i));
+          }
+        }
+      } else {
+        int row = parseHeader ? 1 : 0;
+        if (data.hasHeader()) {
+          final ArrayList<String> header = data.getHeader();
+          signature.add(new Attribute(header.get(0)));
+          for (int i = 1; i < header.size(); i++) {
+            signature.add(new Attribute(header.get(i)));
+          }
+        }
+        for (final ArrayList<String> record : data) {
+          if (signature.isEmpty()) {
+            signature.add(new Attribute("Y"));
+            for (int i = 1; i < record.size(); i++) {
+              signature.add(new Attribute("X" + i));
+            }
+          }
+          for (int col = 0; col < record.size(); col++) {
+            parseDouble(record, row, col);
+          }
+        }
+        if (signature.isEmpty()) {
+          throw new IOException("There is no records in the CSV file!");
+        }
+      }
+      this.signature = signature;
+    }
+  }
+
+  public ArrayList<Attribute> getSignature() {
+    return new ArrayList<>(signature);
+  }
+
+  public Instances read(File file) throws IOException {
+    final Instances instances = new Instances("DATA", signature, 100);
+    instances.setClassIndex(0);
+    try (final CSVInputData data = new CSVInputData(file, parseHeader)) {
+      int row = parseHeader ? 1 : 0;
+      for (final ArrayList<String> record : data) {
+        final Instance instance = new DenseInstance(instances.numAttributes());
+        for (int i = 1; i < record.size(); i++) {
+          instance.setValue(i, parseDouble(record, row, i));
+        }
+        if (nominal) {
+          instance.setValue(0, signature.get(0).indexOfValue(record.get(0)));
+        } else {
+          instance.setValue(0, parseDouble(record, row, 0));
+        }
+        instances.add(new SparseInstance(instance));
+        row++;
+      }
+    }
+    return instances;
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/io/CSVInputData.java b/src/main/java/ai/preferred/regression/io/CSVInputData.java
new file mode 100644
index 0000000..9383515
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/io/CSVInputData.java
@@ -0,0 +1,132 @@
+package ai.preferred.regression.io;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+public class CSVInputData implements Iterable<ArrayList<String>>, AutoCloseable {
+
+  private final ArrayList<String> header;
+  private final boolean parseHeader;
+  private final File file;
+
+  public CSVInputData(File file, boolean parseHeader) throws IOException {
+    this.file = file;
+    this.parseHeader = parseHeader;
+    if (parseHeader) {
+      header = parseHeader();
+    } else {
+      header = null;
+    }
+  }
+
+  private ArrayList<String> parseHeader() throws IOException {
+    final CSVParser parser = newParser();
+    final Iterator<CSVRecord> iterator = parser.iterator();
+    if (!iterator.hasNext()) {
+      throw new IOException("The header record is not found!");
+    }
+    CSVRecord headerRecord = iterator.next();
+    parser.close();
+    return Lists.newArrayList(headerRecord);
+  }
+
+  public boolean hasHeader() {
+    return header != null;
+  }
+
+  public ArrayList<String> getHeader() {
+    if (header == null) {
+      throw new UnsupportedOperationException("This CSV file has no header!");
+    }
+    return new ArrayList<>(header);
+  }
+
+  public ArrayList<ArrayList<String>> getRecords() throws IOException {
+    final CSVParser parser = newParser();
+    final Iterator<CSVRecord> iterator = parser.iterator();
+    final ArrayList<ArrayList<String>> data = new ArrayList<>();
+    skipHeaderIfExists(iterator);
+    while (iterator.hasNext()) {
+      data.add(Lists.newArrayList(iterator.next()));
+    }
+    parser.close();
+    return data;
+  }
+
+  private void skipHeaderIfExists(Iterator<CSVRecord> iterator) throws IOException {
+    if (parseHeader) {
+      if (!iterator.hasNext()) {
+        throw new IOException("The header record is not found!");
+      }
+      iterator.next();
+    }
+  }
+
+  @Override
+  public Iterator<ArrayList<String>> iterator() {
+    try {
+      return new Iter();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  private CSVParser newParser() throws IOException {
+    return CSVParser.parse(file, StandardCharsets.UTF_8, CSVFormat.EXCEL);
+  }
+
+  @Override
+  public void close() {
+    // do nothing
+  }
+
+  private class Iter implements Iterator<ArrayList<String>>, Closeable {
+
+    private final CSVParser parser;
+    private final Iterator<CSVRecord> innerIter;
+
+    Iter() throws IOException {
+      parser = CSVInputData.this.newParser();
+      innerIter = parser.iterator();
+      skipHeaderIfExists(innerIter);
+    }
+
+    @Override
+    public boolean hasNext() {
+      final boolean hasNext = innerIter.hasNext();
+      if (!hasNext) {
+        try {
+          parser.close();
+        } catch (IOException e) {
+          throw new IllegalStateException(e);
+        }
+      }
+      return hasNext;
+    }
+
+    @Override
+    public ArrayList<String> next() {
+      return Lists.newArrayList(innerIter.next());
+    }
+
+    @Override
+    public void close() throws IOException {
+      parser.close();
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+      close();
+    }
+
+  }
+}
diff --git a/src/main/java/ai/preferred/regression/io/CSVUtils.java b/src/main/java/ai/preferred/regression/io/CSVUtils.java
new file mode 100644
index 0000000..60abd40
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/io/CSVUtils.java
@@ -0,0 +1,35 @@
+package ai.preferred.regression.io;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+
+public class CSVUtils {
+
+  public static CSVInputData reader(File file, boolean header) throws IOException {
+    return new CSVInputData(file, header);
+  }
+
+  public static CSVPrinter printer(File file) throws IOException {
+    return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(file, false), StandardCharsets.UTF_8), CSVFormat.EXCEL);
+  }
+
+  @SafeVarargs
+  public static <T> String[] toStringArray(T... values) {
+    final String[] strings = new String[values.length];
+    for (int i = 0; i < values.length; i++) {
+      strings[i] = String.valueOf(values[i]);
+    }
+    return strings;
+  }
+
+  private CSVUtils() {
+    throw new AssertionError();
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/AddX2.java b/src/main/java/ai/preferred/regression/pe/AddX2.java
new file mode 100644
index 0000000..18fd1bf
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/AddX2.java
@@ -0,0 +1,40 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class AddX2 extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(AddX2.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the column", required = true)
+  private int column;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      final ArrayList<String> header = data.getHeader();
+      header.add("(" + header.get(column) + ")^2");
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      final String value = record.get(column);
+      final double x = Double.parseDouble(value);
+      final double x2 = x * x;
+      record.add(String.valueOf(x2));
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(AddX2.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/AddX3.java b/src/main/java/ai/preferred/regression/pe/AddX3.java
new file mode 100644
index 0000000..7847392
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/AddX3.java
@@ -0,0 +1,41 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class AddX3 extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(AddX3.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the column", required = true)
+  private int column;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      ArrayList<String> header = data.getHeader();
+      // TODO: transform this header here!
+      // FOR EXAMPLE:
+      // header.add("NEW_COLUMN");
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      // TODO: transform each record here!
+      // FOR EXAMPLE:
+      // record.add("VALUE");
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(AddX3.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/Dummy.java b/src/main/java/ai/preferred/regression/pe/Dummy.java
new file mode 100644
index 0000000..755ae22
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/Dummy.java
@@ -0,0 +1,42 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class Dummy extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(Dummy.class);
+
+  // TODO: add your options!
+  @Option(name = "-z", aliases = {"--option-z"})
+  private boolean option = false;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      ArrayList<String> header = data.getHeader();
+      // TODO: transform this header here!
+      // FOR EXAMPLE:
+      // header.add("NEW_COLUMN");
+      printer.printRecord(header);
+    }
+
+    for (ArrayList<String> record : data) {
+      // TODO: transform each record here!
+      // FOR EXAMPLE:
+      // record.add("VALUE");
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(Dummy.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/EncodeTextAsFrequency.java b/src/main/java/ai/preferred/regression/pe/EncodeTextAsFrequency.java
new file mode 100644
index 0000000..29975d4
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/EncodeTextAsFrequency.java
@@ -0,0 +1,121 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import ai.preferred.regression.io.CSVUtils;
+import ai.preferred.regression.pe.data.Vocabulary;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.ImmutableMultiset;
+import com.google.common.collect.Multiset;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.regex.Pattern;
+
+public class EncodeTextAsFrequency extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(EncodeTextAsFrequency.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the input column", required = true)
+  private int column;
+
+  @Option(name = "-s", aliases = {"--separator"}, usage = "specifies regular expression for splitting text into words")
+  private String separator = "\\W+";
+
+  @Option(name = "-n", aliases = {"--number-of-words"}, usage = "the maximum number of words to keep")
+  private int numberOfWords = 1000;
+
+  @Option(name = "-p", aliases = {"--prefix"}, usage = "the prefix of the new columns")
+  private String prefix = "WORD:";
+
+  private static <T> Comparator<Multiset.Entry<T>> getDecreasingCountComparator() {
+    return (entry1, entry2) -> Integer.compare(entry2.getCount(), entry1.getCount());
+  }
+
+  private static String[] toLowerCase(String[] words) {
+    final String[] result = new String[words.length];
+    for (int i = 0; i < words.length; i++) {
+      result[i] = words[i].toLowerCase();
+    }
+    return result;
+  }
+
+  private static String[] trimEmpty(String[] words) {
+    final ArrayList<String> result = new ArrayList<>();
+    for (final String word : words) {
+      if (!word.trim().isEmpty()) {
+        result.add(word);
+      }
+    }
+    return result.toArray(new String[0]);
+  }
+
+  private static Multiset<String> toBagOfWords(String text, String separator) {
+    final Pattern tokenizer = Pattern.compile(separator);
+    String[] words;
+    words = tokenizer.split(text);
+    words = trimEmpty(words);
+    words = toLowerCase(words);
+    return ImmutableMultiset.copyOf(toLowerCase(words));
+  }
+
+  private Vocabulary buildVocabulary(CSVInputData reader, int numberOfWords) {
+    final Multiset<String> vocabulary = HashMultiset.create();
+
+    for (final ArrayList<String> values : reader) {
+      final String text = values.get(column);
+      vocabulary.addAll(toBagOfWords(text, separator));
+    }
+
+    final ArrayList<Multiset.Entry<String>> highestCountFirst = new ArrayList<>(vocabulary.entrySet());
+    highestCountFirst.sort(getDecreasingCountComparator());
+
+    final ArrayList<String> wordsToRetain = new ArrayList<>(numberOfWords);
+    for (final Multiset.Entry<String> e : highestCountFirst.subList(0, Math.min(highestCountFirst.size(), numberOfWords))) {
+      wordsToRetain.add(e.getElement());
+    }
+
+    return new Vocabulary(wordsToRetain);
+  }
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    final Vocabulary vocabulary = buildVocabulary(data, numberOfWords);
+
+    if (data.hasHeader()) {
+      final ArrayList<String> header = data.getHeader();
+      header.remove(column);
+      for (final String h : vocabulary.getVocabularyList()) {
+        header.add(prefix + h);
+      }
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      final Multiset<String> bagOfWords = toBagOfWords(record.get(column), separator);
+      final Integer[] vDocument = new Integer[vocabulary.size()];
+      Arrays.fill(vDocument, 0);
+      for (final Multiset.Entry<String> entry : bagOfWords.entrySet()) {
+        final int index = vocabulary.getIndex(entry.getElement());
+        if (index == -1) {
+          continue;
+        }
+        vDocument[index] = entry.getCount();
+      }
+      record.remove(column);
+      Collections.addAll(record, CSVUtils.toStringArray(vDocument));
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(EncodeTextAsFrequency.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/EncodeValueAsOneHot.java b/src/main/java/ai/preferred/regression/pe/EncodeValueAsOneHot.java
new file mode 100644
index 0000000..c70ec3d
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/EncodeValueAsOneHot.java
@@ -0,0 +1,61 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import ai.preferred.regression.io.CSVUtils;
+import ai.preferred.regression.pe.data.Vocabulary;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+public class EncodeValueAsOneHot extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(EncodeValueAsOneHot.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the input column", required = true)
+  private int column;
+
+  @Option(name = "-p", aliases = {"--prefix"}, usage = "the prefix of the new columns")
+  private String prefix = "VALUE:";
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    final Vocabulary vocabulary = buildVocabulary(data);
+
+    if (data.hasHeader()) {
+      final ArrayList<String> header = data.getHeader();
+      header.remove(column);
+      for (final String h : vocabulary.getVocabularyList()) {
+        header.add(prefix + h);
+      }
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      final Integer[] vOneHot = new Integer[vocabulary.size()];
+      Arrays.fill(vOneHot, 0);
+      final int index = vocabulary.getIndex(record.get(column));
+      vOneHot[index] = 1;
+      record.remove(column);
+      Collections.addAll(record, CSVUtils.toStringArray(vOneHot));
+      printer.printRecord(record);
+    }
+  }
+
+  private Vocabulary buildVocabulary(CSVInputData reader) {
+    final Set<String> vocabulary = new HashSet<>();
+    for (final ArrayList<String> record : reader) {
+      vocabulary.add(record.get(column));
+    }
+
+    return new Vocabulary(vocabulary);
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(EncodeValueAsOneHot.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/Partition.java b/src/main/java/ai/preferred/regression/pe/Partition.java
new file mode 100644
index 0000000..4e7e31b
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/Partition.java
@@ -0,0 +1,42 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class Partition extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(Partition.class);
+
+  @Option(name = "-p", aliases = {"--proportion"}, usage = "the proportion of data to be selected or excluded (ranges from 0.0 to 1.0)")
+  private double percent = 0.8;
+
+  @Option(name = "-e", aliases = {"--exclude"}, usage = "takes the other half of the selection if specified")
+  private boolean exclude = false;
+
+  @Override
+  protected void process(CSVInputData reader, CSVPrinter printer) throws IOException {
+    if (reader.hasHeader()) {
+      printer.printRecord(reader.getHeader());
+    }
+
+    final ArrayList<ArrayList<String>> data = reader.getRecords();
+    final int n = (int)Math.round(percent * data.size());
+
+    if (exclude) {
+      printer.printRecords(data.subList(n, data.size()));
+    } else {
+      printer.printRecords(data.subList(0, n));
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(Partition.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/ProcessingElement.java b/src/main/java/ai/preferred/regression/pe/ProcessingElement.java
new file mode 100644
index 0000000..1478193
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/ProcessingElement.java
@@ -0,0 +1,88 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import ai.preferred.regression.io.CSVUtils;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+
+public abstract class ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ProcessingElement.class);
+
+  @Option(name = "-i", aliases = {"--input"}, usage = "the path to the input CSV file", required = true)
+  private File input;
+
+  @Option(name = "-o", aliases = {"--output"}, usage = "the path to the output CSV file", required = true)
+  private File output;
+
+  @Option(name = "-h", aliases = {"--header"}, usage = "specifies if the input CSV files have headers")
+  private boolean header = true;
+
+  public ProcessingElement() {
+  }
+
+  protected abstract void process(CSVInputData data, CSVPrinter printer) throws Exception;
+
+  protected static void parseArgsAndRun(Class<? extends ProcessingElement> clazz, String[] args) {
+    ProcessingElement processingElement = null;
+    try {
+      processingElement = clazz.newInstance();
+    } catch (IllegalAccessException | InstantiationException e) {
+      System.err.println("Please check if there is the public default constructor for the class: " + clazz.getCanonicalName());
+      System.exit(1);
+    }
+
+    if (args == null) {
+      System.out.println("=========== HELP ===========");
+      System.out.println();
+      System.out.println("Processing Element: " + clazz.getSimpleName() + ".class");
+      System.out.println();
+      System.out.println("Shell.run(" + clazz.getSimpleName() + ".class, \"\");");
+      final CmdLineParser parser = new CmdLineParser(processingElement);
+      System.out.println();
+      parser.printUsage(System.out);
+      System.out.println();
+      System.out.println("============================");
+      System.out.println();
+      System.out.println();
+      return;
+    }
+
+    final CmdLineParser parser = new CmdLineParser(processingElement);
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      System.err.println("ProcessingElement: " + clazz.getCanonicalName());
+      System.err.println(e.getMessage());
+      System.err.println();
+      parser.printUsage(System.err);
+      System.exit(1);
+    }
+
+    if (processingElement.input.equals(processingElement.output)) {
+      LOGGER.error("The input and output files point to the same location: {}", processingElement.input);
+      System.exit(1);
+    }
+
+    try (final CSVPrinter printer = CSVUtils.printer(processingElement.output);
+         final CSVInputData reader = CSVUtils.reader(processingElement.input, processingElement.header)) {
+      try {
+        processingElement.process(reader, printer);
+      } catch (Exception e) {
+        LOGGER.error("Unexpected error: ", e);
+        System.exit(1);
+      }
+    } catch (IOException e) {
+      LOGGER.error("Unable to process files: ", e);
+      System.exit(1);
+    }
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/ProjectColumns.java b/src/main/java/ai/preferred/regression/pe/ProjectColumns.java
new file mode 100644
index 0000000..c376795
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/ProjectColumns.java
@@ -0,0 +1,61 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.StringArrayOptionHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class ProjectColumns extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ProjectColumns.class);
+
+  @Option(name = "-c", aliases = {"--columns"}, usage = "the column names separated by spaces", handler = StringArrayOptionHandler.class, required = true)
+  private String[] columns = new String[0];
+
+  private static SortedSet<Integer> indicesOf(ArrayList<String> header, String[] columns) {
+    final SortedSet<Integer> indices = new TreeSet<>();
+    for (final String name : columns) {
+      int index = header.indexOf(name);
+      if (index > -1) {
+        indices.add(index);
+      }
+    }
+    return indices;
+  }
+
+  private static <T> ArrayList<T> projectIndices(ArrayList<T> list, SortedSet<Integer> indices) {
+    final ArrayList<T> projection = new ArrayList<>(indices.size());
+    for (int index : indices) {
+      projection.add(list.get(index));
+    }
+    return projection;
+  }
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (!data.hasHeader()) {
+      throw new IllegalArgumentException("ProjectColumns requires CSV with header!");
+    }
+
+    final ArrayList<String> header = data.getHeader();
+    final SortedSet<Integer> indices = indicesOf(header, columns);
+    printer.printRecord(projectIndices(header, indices));
+
+    for (final ArrayList<String> record : data) {
+      printer.printRecord(projectIndices(record, indices));
+    }
+
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(ProjectColumns.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/RemoveColumn.java b/src/main/java/ai/preferred/regression/pe/RemoveColumn.java
new file mode 100644
index 0000000..1b14bea
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/RemoveColumn.java
@@ -0,0 +1,37 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class RemoveColumn extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(RemoveColumn.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the column to be dropped", required = true)
+  private int column;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      final ArrayList header = data.getHeader();
+      header.remove(column);
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      record.remove(column);
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(RemoveColumn.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/SelectEquals.java b/src/main/java/ai/preferred/regression/pe/SelectEquals.java
new file mode 100644
index 0000000..21aa719
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/SelectEquals.java
@@ -0,0 +1,39 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class SelectEquals extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(SelectEquals.class);
+
+  @Option(name = "-c", aliases = {"--column"}, usage = "the index of the input column", required = true)
+  private int column;
+
+  @Option(name = "-e", aliases = {"--equals"}, usage = "the value to be verified", required = true)
+  private String value;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      printer.printRecord(data.getHeader());
+    }
+
+    for (final ArrayList<String> record : data) {
+      if (value.equals(record.get(column))) {
+        printer.printRecord(record);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(SelectEquals.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/Shuffle.java b/src/main/java/ai/preferred/regression/pe/Shuffle.java
new file mode 100644
index 0000000..cafc95c
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/Shuffle.java
@@ -0,0 +1,36 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Random;
+
+public class Shuffle extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(Shuffle.class);
+
+  @Option(name = "-s", aliases = {"--seed"}, usage = "random seed")
+  private long seed = 1;
+
+  @Override
+  protected void process(CSVInputData reader, CSVPrinter printer) throws IOException {
+    if (reader.hasHeader()) {
+      printer.printRecord(reader.getHeader());
+    }
+
+    final ArrayList<ArrayList<String>> data = reader.getRecords();
+    Collections.shuffle(data, new Random(seed));
+    printer.printRecords(data);
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(Shuffle.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/SwapColumns.java b/src/main/java/ai/preferred/regression/pe/SwapColumns.java
new file mode 100644
index 0000000..8e0b693
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/SwapColumns.java
@@ -0,0 +1,41 @@
+package ai.preferred.regression.pe;
+
+import ai.preferred.regression.io.CSVInputData;
+import org.apache.commons.csv.CSVPrinter;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+
+public class SwapColumns extends ProcessingElement {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(SwapColumns.class);
+
+  @Option(name = "-x", aliases = {"--column-x"}, usage = "the index of one column to be swapped", required = true)
+  private int column1;
+
+  @Option(name = "-y", aliases = {"--column-y"}, usage = "the index of the other column to be swapped", required = true)
+  private int column2;
+
+  @Override
+  protected void process(CSVInputData data, CSVPrinter printer) throws IOException {
+    if (data.hasHeader()) {
+      final ArrayList<String> header = data.getHeader();
+      Collections.swap(header, column1, column2);
+      printer.printRecord(header);
+    }
+
+    for (final ArrayList<String> record : data) {
+      Collections.swap(record, column1, column2);
+      printer.printRecord(record);
+    }
+  }
+
+  public static void main(String[] args) {
+    parseArgsAndRun(SwapColumns.class, args);
+  }
+
+}
diff --git a/src/main/java/ai/preferred/regression/pe/data/Vocabulary.java b/src/main/java/ai/preferred/regression/pe/data/Vocabulary.java
new file mode 100644
index 0000000..ad621b6
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/pe/data/Vocabulary.java
@@ -0,0 +1,50 @@
+package ai.preferred.regression.pe.data;
+
+import java.util.*;
+
+public class Vocabulary {
+
+  private final ArrayList<String> vocabularyList;
+  private final Map<String, Integer> vocabularyMap;
+
+  public Vocabulary(Collection<String> vocabulary) {
+    vocabularyList = new ArrayList<>(vocabulary);
+    Collections.sort(vocabularyList);
+    vocabularyMap = new HashMap<>(vocabularyList.size());
+    for (final String w : vocabularyList) {
+      vocabularyMap.put(w, vocabularyMap.size());
+    }
+  }
+
+  public List<String> getVocabularyList() {
+    return Collections.unmodifiableList(vocabularyList);
+  }
+
+  public String[] getVocabularyArray() {
+    return vocabularyList.toArray(new String[0]);
+  }
+
+  public int getIndex(String w) {
+    final Integer index = vocabularyMap.get(w);
+    if (index == null) {
+      return 0;
+    }
+    return index;
+  }
+
+  public String getWord(int index) {
+    if (index >= 0 && index < vocabularyList.size()) {
+      return vocabularyList.get(index);
+    }
+    throw new IllegalArgumentException("No such index in the vocabulary: " + index);
+  }
+
+  public int size() {
+    return vocabularyList.size();
+  }
+
+  @Override
+  public String toString() {
+    return "Vocabulary{" + vocabularyList + '}';
+  }
+}
diff --git a/src/main/java/ai/preferred/regression/plot/XYChart.java b/src/main/java/ai/preferred/regression/plot/XYChart.java
new file mode 100644
index 0000000..d993b2b
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/plot/XYChart.java
@@ -0,0 +1,41 @@
+package ai.preferred.regression.plot;
+
+import org.jfree.chart.ChartFactory;
+import org.jfree.chart.ChartPanel;
+import org.jfree.chart.JFreeChart;
+import org.jfree.chart.plot.XYPlot;
+import org.jfree.chart.renderer.xy.XYLineAndShapeRenderer;
+import org.jfree.data.xy.XYDataset;
+import org.jfree.data.xy.XYSeries;
+import org.jfree.data.xy.XYSeriesCollection;
+
+import javax.swing.*;
+import java.awt.*;
+
+public class XYChart extends JFrame {
+
+  private static final long serialVersionUID = 1L;
+
+  public XYChart(String chartTitle, XYSeries data, XYSeries line) {
+    super("Linear Regression Plotter");
+    final XYSeriesCollection collection = new XYSeriesCollection();
+    collection.addSeries(data);
+    collection.addSeries(line);
+    final ChartPanel panel = new ChartPanel(createChart(collection, chartTitle));
+    panel.setPreferredSize(new Dimension(640, 480));
+    setContentPane(panel);
+  }
+
+  private JFreeChart createChart(XYDataset dataset, String title) {
+    final JFreeChart chart = ChartFactory.createXYLineChart(title, "X", "Y", dataset);
+    final XYPlot plot = chart.getXYPlot();
+    final XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer();
+    renderer.setSeriesLinesVisible(0, false);
+    renderer.setSeriesShapesVisible(0, true);
+    renderer.setSeriesLinesVisible(1, true);
+    renderer.setSeriesShapesVisible(1, false);
+    plot.setRenderer(renderer);
+    return chart;
+  }
+
+}
\ No newline at end of file
diff --git a/src/main/java/ai/preferred/regression/reset/DataFiles.java b/src/main/java/ai/preferred/regression/reset/DataFiles.java
new file mode 100644
index 0000000..83164cd
--- /dev/null
+++ b/src/main/java/ai/preferred/regression/reset/DataFiles.java
@@ -0,0 +1,207 @@
+package ai.preferred.regression.reset;
+
+public class DataFiles {
+
+  public static final String ICECREAM_CSV = "Consumption,Temperature\n" +
+      "0.386,5.00\n" +
+      "0.374,13.33\n" +
+      "0.393,17.22\n" +
+      "0.425,20.00\n" +
+      "0.406,20.56\n" +
+      "0.344,18.33\n" +
+      "0.327,16.11\n" +
+      "0.288,8.33\n" +
+      "0.269,0.00\n" +
+      "0.256,-4.44\n" +
+      "0.286,-2.22\n" +
+      "0.298,-3.33\n" +
+      "0.329,0.00\n" +
+      "0.318,4.44\n" +
+      "0.381,12.78\n" +
+      "0.381,17.22\n" +
+      "0.47,22.22\n" +
+      "0.443,22.22\n" +
+      "0.386,19.44\n" +
+      "0.342,15.56\n" +
+      "0.319,6.67\n" +
+      "0.307,4.44\n" +
+      "0.284,0.00\n" +
+      "0.326,-2.78\n" +
+      "0.309,-2.22\n" +
+      "0.359,0.56\n" +
+      "0.376,5.00\n" +
+      "0.416,11.11\n" +
+      "0.437,17.78\n" +
+      "0.548,21.67\n";
+
+  public static final String ICECREAM_RAW_CSV = "Id,Temperature,Consumption\n" +
+      "1,5.00,0.386\n" +
+      "2,13.33,0.374\n" +
+      "3,17.22,0.393\n" +
+      "4,20.00,0.425\n" +
+      "5,20.56,0.406\n" +
+      "6,18.33,0.344\n" +
+      "7,16.11,0.327\n" +
+      "8,8.33,0.288\n" +
+      "9,0.00,0.269\n" +
+      "10,-4.44,0.256\n" +
+      "11,-2.22,0.286\n" +
+      "12,-3.33,0.298\n" +
+      "13,0.00,0.329\n" +
+      "14,4.44,0.318\n" +
+      "15,12.78,0.381\n" +
+      "16,17.22,0.381\n" +
+      "17,22.22,0.47\n" +
+      "18,22.22,0.443\n" +
+      "19,19.44,0.386\n" +
+      "20,15.56,0.342\n" +
+      "21,6.67,0.319\n" +
+      "22,4.44,0.307\n" +
+      "23,0.00,0.284\n" +
+      "24,-2.78,0.326\n" +
+      "25,-2.22,0.309\n" +
+      "26,0.56,0.359\n" +
+      "27,5.00,0.376\n" +
+      "28,11.11,0.416\n" +
+      "29,17.78,0.437\n" +
+      "30,21.67,0.548\n";
+
+  public static final String AMAZON_CSV = "Id,Rating,Text\n" +
+      "1,5,I only spent less than ten on these so they're good for what I paid for\n" +
+      "2,5,I'm in love with these glasses.\n" +
+      "3,5,Stylish. My kid loved them\n" +
+      "4,5,They came in great condition.\n" +
+      "5,5,These are really wonderful!\n" +
+      "6,5,these are GREAT quality\n" +
+      "7,5,She LOVES them!\n" +
+      "8,5,Love these.\n" +
+      "9,5,The quality is pretty good also.\n" +
+      "10,5,EXCELLENT PRODUCT\n" +
+      "11,5,I love them. Exactly what i wanted.\n" +
+      "12,5,Son love them\n" +
+      "13,5,He says they give him that style.\n" +
+      "14,5,Great value!!!\n" +
+      "15,5,Very complimentary!\n" +
+      "16,5,\"Cute, great quality, good fit.\"\n" +
+      "17,5,I love these glasses!!\n" +
+      "18,5,they fit perfectly.\n" +
+      "19,5,They look expensive and the fit is perfect\n" +
+      "20,5,Sturdy and good looking for a great price\n" +
+      "21,5,Very stylish! Great accessory to compliment an outfit\n" +
+      "22,5,Thanks so much my grandson enjoy them.\n" +
+      "23,5,Daughter loves them.\n" +
+      "24,5,Makes me look smarter in my tinder profile !\n" +
+      "25,1,the side arms keep breaking\n" +
+      "26,1,just look soooo cheap!\n" +
+      "27,1,Not my style.\n" +
+      "28,1,Mine arrived broken!! Not worth sending back.\n" +
+      "29,1,Dollar store quality.\n" +
+      "30,1,Not like picture.\n" +
+      "31,1,We're cheap and broke right away. \n" +
+      "32,1,\"These are so cheap looking, they are unwearable.\"\n" +
+      "33,1,Very Very VERY Round ! Not at all vintage .\n" +
+      "34,1,It's a peace of garbage. Feels so cheap and plastic.\n" +
+      "35,1,feel flimsy like it would break i returned it the next day\n" +
+      "36,1,lens have too much glare\n" +
+      "37,1,they look cheaply made and plastic\n" +
+      "38,1,Very cheap looking\n" +
+      "39,1,make me headache\n" +
+      "40,1,Feel apart after a week of getting them prescribed.\n" +
+      "41,1,Really cheap looking.\n" +
+      "42,1,Glasses are crooked and not made correctly.\n" +
+      "43,1,poorly made... broke after three days\n" +
+      "44,1,Sunglasses were very small.\n" +
+      "45,1,Look fake and cheap\n" +
+      "46,1,lens fell out on first day.\n" +
+      "47,1,Not really like it!\n" +
+      "48,1,Delivered broken.\n" +
+      "49,1,\"Overall, trash.\"\n" +
+      "50,1,\"Were broken when I opened the box, very disappointed\"\n" +
+      "51,1,Horrible lens fell out 2nd day!\n" +
+      "52,1,I don't like them.\n" +
+      "53,1,These hoes broke too I want my money\n" +
+      "54,1,Broke within the 3 days\n" +
+      "55,1,Little small but still good\n" +
+      "56,1,Super small\n" +
+      "57,1,Horrible desing\n" +
+      "58,1,It's broke\n" +
+      "59,1,Crooked and cheaply made.\n" +
+      "60,1,Poor quality\n";
+
+  public static final String CAMERA_CSV = "Id,Price (USD),Type,Focus\n" +
+      "1,949,MIRRORLESS,MANUAL\n" +
+      "2,99,DSLR,BOTH\n" +
+      "3,90,DSLR,BOTH\n" +
+      "4,80,DSLR,AUTO\n" +
+      "5,20,COMPACT,MANUAL\n" +
+      "6,50,COMPACT,AUTO\n" +
+      "7,49,COMPACT,AUTO\n" +
+      "8,30,COMPACT,AUTO\n" +
+      "9,800,MIRRORLESS,AUTO\n" +
+      "10,789,MIRRORLESS,MANUAL\n" +
+      "11,35,COMPACT,AUTO\n" +
+      "12,789,MIRRORLESS,BOTH\n";
+
+  public static final String AMAZON_EXTENDED = "Id,Rating,Text,Verified Purchase,Helpful\n" +
+      "1,5,I only spent less than ten on these so they're good for what I paid for,YES,11\n" +
+      "2,5,I'm in love with these glasses.,YES,2\n" +
+      "3,5,Stylish. My kid loved them,YES,2\n" +
+      "4,5,They came in great condition.,NO,0\n" +
+      "5,5,These are really wonderful!,YES,0\n" +
+      "6,5,these are GREAT quality,YES,0\n" +
+      "7,5,She LOVES them!,YES,0\n" +
+      "8,5,Love these.,YES,0\n" +
+      "9,5,The quality is pretty good also.,YES,3\n" +
+      "10,5,EXCELLENT PRODUCT,YES,0\n" +
+      "11,5,I love them. Exactly what i wanted.,NO,0\n" +
+      "12,5,Son love them,YES,0\n" +
+      "13,5,He says they give him that style.,YES,0\n" +
+      "14,5,Great value!!!,YES,0\n" +
+      "15,5,Very complimentary!,YES,0\n" +
+      "16,5,\"Cute, great quality, good fit.\",YES,1\n" +
+      "17,5,I love these glasses!!,YES,0\n" +
+      "18,5,they fit perfectly.,YES,0\n" +
+      "19,5,They look expensive and the fit is perfect,NO,0\n" +
+      "20,5,Sturdy and good looking for a great price,YES,0\n" +
+      "21,5,Very stylish! Great accessory to compliment an outfit,YES,0\n" +
+      "22,5,Thanks so much my grandson enjoy them.,YES,0\n" +
+      "23,5,Daughter loves them.,YES,0\n" +
+      "24,5,Makes me look smarter in my tinder profile !,YES,0\n" +
+      "25,1,the side arms keep breaking,YES,0\n" +
+      "26,1,just look soooo cheap!,NO,0\n" +
+      "27,1,Not my style.,NO,0\n" +
+      "28,1,Mine arrived broken!! Not worth sending back.,YES,0\n" +
+      "29,1,Dollar store quality.,YES,5\n" +
+      "30,1,Not like picture.,NO,0\n" +
+      "31,1,We're cheap and broke right away.Â ,YES,0\n" +
+      "32,1,\"These are so cheap looking, they are unwearable.\",YES,0\n" +
+      "33,1,Very Very VERY Round ! Not at all vintage .,YES,0\n" +
+      "34,1,It's a peace of garbage. Feels so cheap and plastic.,YES,0\n" +
+      "35,1,feel flimsy like it would break i returned it the next day,NO,0\n" +
+      "36,1,lens have too much glare,YES,0\n" +
+      "37,1,they look cheaply made and plastic,YES,0\n" +
+      "38,1,Very cheap looking,NO,0\n" +
+      "39,1,make me headache,YES,0\n" +
+      "40,1,Feel apart after a week of getting them prescribed.,NO,0\n" +
+      "41,1,Really cheap looking.,YES,0\n" +
+      "42,1,Glasses are crooked and not made correctly.,YES,7\n" +
+      "43,1,poorly made... broke after three days,YES,2\n" +
+      "44,1,Sunglasses were very small.,NO,0\n" +
+      "45,1,Look fake and cheap,YES,0\n" +
+      "46,1,lens fell out on first day.,NO,0\n" +
+      "47,1,Not really like it!,YES,2\n" +
+      "48,1,Delivered broken.,YES,0\n" +
+      "49,1,\"Overall, trash.\",YES,0\n" +
+      "50,1,\"Were broken when I opened the box, very disappointed\",NO,0\n" +
+      "51,1,Horrible lens fell out 2nd day!,YES,5\n" +
+      "52,1,I don't like them.,NO,0\n" +
+      "53,1,These hoes broke too I want my money,NO,0\n" +
+      "54,1,Broke within the 3 days,NO,0\n" +
+      "55,1,Little small but still good,NO,4\n" +
+      "56,1,Super small,NO,0\n" +
+      "57,1,Horrible desing,NO,0\n" +
+      "58,1,It's broke,NO,0\n" +
+      "59,1,Crooked and cheaply made.,NO,0\n" +
+      "60,1,Poor quality,YES,0\n";
+
+}
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
new file mode 100644
index 0000000..8ee929f
--- /dev/null
+++ b/src/main/resources/log4j.properties
@@ -0,0 +1,22 @@
+log4j.rootLogger=ERROR, STDOUT
+log4j.logger.ai.preferred.venom=INFO, STDOUT
+log4j.logger.ai.preferred.venom.proxy=INFO, STDOUT
+log4j.logger.ai.preferred.venom.storage=INFO, STDOUT
+log4j.logger.ai.preferred.minerva=INFO, STDOUT
+log4j.logger.ai.preferred.crawler=DEBUG, STDOUT
+
+log4j.additivity.ai.preferred.venom=false
+log4j.additivity.ai.preferred.venom.proxy=false
+log4j.additivity.ai.preferred.venom.storage=false
+log4j.additivity.ai.preferred.minerva=false
+log4j.additivity.ai.preferred.crawler=false
+
+log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender
+log4j.appender.STDOUT.Target=System.out
+log4j.appender.STDOUT.layout=org.apache.log4j.EnhancedPatternLayout
+log4j.appender.STDOUT.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n%throwable
+log4j.appender.FILE=org.apache.log4j.RollingFileAppender
+log4j.appender.FILE.File=log.txt
+log4j.appender.FILE.Append=true
+log4j.appender.FILE.layout=org.apache.log4j.EnhancedPatternLayout
+log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n%throwable
\ No newline at end of file
diff --git a/src/test/java/.gitkeep b/src/test/java/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/src/test/resources/.gitkeep b/src/test/resources/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/temp/.gitkeep b/temp/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/utils/assemble.py b/utils/assemble.py
new file mode 100644
index 0000000..40ece2c
--- /dev/null
+++ b/utils/assemble.py
@@ -0,0 +1,38 @@
+import os.path
+import os
+import re
+
+CODE_PATTERN = re.compile(r'.+main.+?{(.+)}.+}', re.S)
+
+def begin_section(name):
+  m = 23 - int(len(name) / 2)
+  padding = '=' * m
+  result = padding + ' ' + name + ' ' + padding
+  if len(result) % 2 == 0:
+    result += '='
+  print(result)
+
+def end_section():
+  print('=' * 49)
+
+def main():
+  exercise_dir = '../src/main/java/ai/preferred/regression/exercise/'
+  for fn in os.listdir(exercise_dir):
+    if fn.startswith('E') and not fn.startswith('E20'):
+      name = fn.split('.')[0]
+      begin_section(name)
+      with open(os.path.join(exercise_dir, fn), 'r', encoding='utf8') as f:
+        m = CODE_PATTERN.search(f.read())
+        if m:
+          print()
+          print('public static void main(String[] args) {')
+          print(m.group(1).strip(' '))
+          print('}')
+          print()
+      end_section()
+      print()
+      print()
+
+if __name__ == '__main__':
+  main()
+