Skip to content

Commit 5920e9d

Browse files
committed
perf: CsvUtil.java工具类添加获取文件编码接口。
1 parent e7ff9f9 commit 5920e9d

File tree

1 file changed

+62
-2
lines changed
  • springboot-aop/src/main/java/com/study/module/util/office

1 file changed

+62
-2
lines changed

springboot-aop/src/main/java/com/study/module/util/office/CsvUtil.java

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,11 @@ public static Map<String, Object> readCsv(String csvFilePath, boolean isHasHeade
135135
try {
136136
// 中文乱码问题:源文件的编码格式与程序设置的读取格式不一致所致(调整csv文件为UTF-8集合)
137137
DataInputStream in = new DataInputStream(new FileInputStream(csvFilePath));
138-
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "gbk"));
138+
BufferedReader reader = new BufferedReader(new InputStreamReader(in, getCodeString(csvFilePath)));
139139
//第一行信息,为标题信息,不用,如果需要,注释掉
140140
// reader.readLine();
141141
if (isHasHeader) {
142-
result.put("header", reader.readLine());
142+
result.put("header", Arrays.asList(splitCSV(reader.readLine())));
143143
}
144144
String line;
145145
List<List<String>> dataList = new ArrayList<>();
@@ -189,6 +189,66 @@ public static Map<String, Object> readCsv(MultipartFile multipartFile, Boolean i
189189
return result;
190190
}
191191

192+
/**
193+
* 获得文件编码(https://www.php.cn/java/base/439367.html)
194+
*
195+
* @param fileName 文件名称(例如:D:/good.csv)
196+
* @return 编码格式
197+
* @throws Exception
198+
*/
199+
public static String getCodeString(String fileName) throws Exception {
200+
BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName));
201+
int p = (bin.read() << 8) + bin.read();
202+
bin.close();
203+
String code;
204+
switch (p) {
205+
case 0xefbb:
206+
code = "UTF-8";
207+
break;
208+
case 0xfffe:
209+
code = "Unicode";
210+
break;
211+
case 0xfeff:
212+
code = "UTF-16BE";
213+
break;
214+
default:
215+
System.out.println("默认值");
216+
code = "GBK"; // 跟编辑器显示有关
217+
}
218+
return code;
219+
}
220+
221+
/**
222+
* 获取流对应的编码类型
223+
*
224+
* @param bis 文件缓冲流
225+
* @return 编码格式
226+
* @throws Exception
227+
*/
228+
private static String getCharSet(BufferedInputStream bis) throws Exception {
229+
String charSet = null;
230+
byte[] buffer = new byte[3];
231+
//因流读取后再读取可能会缺少内容,此处需要先读,然后再还原
232+
bis.mark(bis.available() + 1);
233+
bis.read(buffer);
234+
bis.reset();
235+
String s = Integer.toHexString(buffer[0] & 0xFF) + Integer.toHexString(buffer[1] & 0xFF) + Integer.toHexString(buffer[2] & 0xFF);
236+
switch (s) {
237+
//GBK,GB2312对应均为d5cbba,统一当成GB2312解析
238+
case "d5cbba":
239+
charSet = "GB2312";
240+
break;
241+
case "efbbbf":
242+
charSet = "UTF-8";
243+
break;
244+
default:
245+
charSet = "GBK";
246+
break;
247+
}
248+
return charSet;
249+
250+
}
251+
192252
public static void testSplitCSV() {
193253
String src1 = "\"fh,zg\",sdf,\"asfs,\",\",dsdf\",\"aadf\"\"\",\"\"\"hdfg\",\"fgh\"\"dgnh\",hgfg'dfh,\"asdfa\"\"\"\"\",\"\"\"\"\"fgjhg\",\"gfhg\"\"\"\"hb\"";
194254
try {

0 commit comments

Comments
 (0)