-
Notifications
You must be signed in to change notification settings - Fork 0
/
CSVProcess.java
233 lines (197 loc) · 5.96 KB
/
CSVProcess.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
public class CSVProcess {
/*
* Read file in to list, create the database and table, insert into table,
* create the csv with bad records, write statistics to log file
*/
public static void main(String[] args) {
if(args.length == 1)
processCSV(args[0]);
else
System.out.println("Error: Incorrect number of arguments");
}
private static void processCSV(String file) {
//statistics for log file
int recieved = 0;
int successful = 0;
int failed = 0;
String warning = null;
BufferedReader bfr = null;
String row[] = null;
ArrayList<String[]> goodRows = new ArrayList<String[]>();
ArrayList<String[]> badRows = new ArrayList<String[]>();
try {
bfr = new BufferedReader(new FileReader(file));
CSVReader cr = new CSVReaderBuilder(bfr).withSkipLines(1).build();
while((row = cr.readNext()) != null) {
recieved++;
if(row.length == 10) {
goodRows.add(row);
successful++;
} else {
badRows.add(row);
failed++;
}
}
} catch (Exception e) {
e.printStackTrace();
}
Connection connection = getConnection(file);
createTable(connection);
insertRecords(connection, goodRows);
if(!checkRowCount(connection, successful))
warning = "There might be missing rows in the database";
makeCSV(file, badRows);
logToFile(recieved, successful, failed, warning, file);
try {
if(!connection.isClosed())
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void logToFile(int recieved, int successful, int failed, String warning, String filename) {
filename = filename.substring(0, filename.lastIndexOf('.'));
filename = filename + ".log";
BufferedWriter bfw = null;
try {
bfw = new BufferedWriter(new FileWriter(filename));
bfw.flush();
bfw.write("recieved: " + recieved);
bfw.newLine();
bfw.write("successful: " + successful);
bfw.newLine();
bfw.write("failed: " + failed);
if(warning != null) {
bfw.newLine();
bfw.write(warning);
}
} catch (Exception e) {
e.printStackTrace();
}finally {
try {
bfw.close();
}catch (IOException e){}
}
}
private static void makeCSV(String filename, ArrayList<String[]> records) {
filename = filename.substring(0, filename.lastIndexOf('.'));
filename = filename + "-bad.csv";
BufferedWriter bfw = null;
StringBuilder sb = null;
try {
bfw = new BufferedWriter(new FileWriter(filename));
bfw.flush();
for(String[] row: records) {
sb = new StringBuilder();
for(String data: row) {
sb.append(data);
sb.append(",");
}
bfw.write(sb.toString());
bfw.newLine();
}
} catch(Exception e) {
e.printStackTrace();
} finally {
try {
bfw.close();
}catch (IOException e){}
}
}
private static void createTable(Connection connection) {
String dropTableSQL = "drop table if exists csv_data;";
String vacuumSQL = "VACUUM;";
String createTableSQL = "create table csv_data (\n"
+ " A text,\n"
+ " B text,\n"
+ " C text,\n"
+ " D text,\n"
+ " E text,\n"
+ " F text,\n"
+ " G text,\n"
+ " H text,\n"
+ " I text,\n"
+ " J text\n"
+ ");";
try {
Statement statement = connection.createStatement();
statement.execute(dropTableSQL);
statement.execute(vacuumSQL);
statement.execute(createTableSQL);
} catch (Exception e) {
e.printStackTrace();
}
}
private static void insertRecords(Connection connection, ArrayList<String[]> records) {
//optimized the insert by turning off autocommit and executing insert in batches
String insertSQL = "insert into csv_data values (?,?,?,?,?,?,?,?,?,?);";
final int batchSize = 1000;
int count = 0;
try {
PreparedStatement statement = connection.prepareStatement(insertSQL);
for(String[] row: records) {
statement.setString(1, row[0]);
statement.setString(2, row[1]);
statement.setString(3, row[2]);
statement.setString(4, row[3]);
statement.setString(5, row[4]);
statement.setString(6, row[5]);
statement.setString(7, row[6]);
statement.setString(8, row[7]);
statement.setString(9, row[8]);
statement.setString(10, row[9]);
statement.addBatch();
if(++count % batchSize == 0){
connection.setAutoCommit(false);
statement.executeBatch();
connection.setAutoCommit(true);
}
}
connection.setAutoCommit(false);
statement.executeBatch();
connection.setAutoCommit(true);
} catch (Exception e) {
e.printStackTrace();
}
}
private static boolean checkRowCount(Connection connection, int successful) {
boolean success = false;
String selectSQL = "select count(*) as numOfRows from csv_data;";
try {
Statement statement = connection.createStatement();
ResultSet rs = statement.executeQuery(selectSQL);
int count = rs.getInt("numOfRows");
if(count == successful)
success = true;
} catch (Exception e) {
e.printStackTrace();
}
return success;
}
public static Connection getConnection(String filename) {
Connection ctn = null;
filename = filename.substring(0, filename.lastIndexOf('.'));
String url = "jdbc:sqlite:" + filename + ".db";
try {
Class.forName("org.sqlite.JDBC");
ctn = DriverManager.getConnection(url);
} catch (Exception e) {
e.printStackTrace();
}
return ctn;
}
}