Skip to content

Commit 1527a5e

Browse files
committed
[SystemZ][z/OS] Add the functions needed for handling EBCDIC I/O
This patch adds the basic functions needed for controlling auto conversion on z/OS. Auto conversion is enabled on untagged input file to ASCII by making the assumption that all untagged files are EBCDIC encoded. Output files are auto converted to EBCDIC IBM-1047. This change also enables conversion for stdin/stdout/stderr. For more information on how fcntl controls codepage https://www.ibm.com/docs/en/zos/2.4.0?topic=descriptions-fcntl-bpx1fct-bpx4fct-control-open-file-descriptors Reviewed By: anirudhp Differential Revision: https://reviews.llvm.org/D100483
1 parent 1b24f35 commit 1527a5e

File tree

6 files changed

+207
-0
lines changed

6 files changed

+207
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===- AutoConvert.h - Auto conversion between ASCII/EBCDIC -----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains functions used for auto conversion between
10+
// ASCII/EBCDIC codepages specific to z/OS.
11+
//
12+
//===----------------------------------------------------------------------===//i
13+
14+
#ifndef LLVM_SUPPORT_AUTOCONVERT_H
15+
#define LLVM_SUPPORT_AUTOCONVERT_H
16+
17+
#ifdef __MVS__
18+
#define CCSID_IBM_1047 1047
19+
#define CCSID_UTF_8 1208
20+
#include <system_error>
21+
22+
namespace llvm {
23+
24+
/// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
25+
/// descriptor.
26+
std::error_code disableAutoConversion(int FD);
27+
28+
/// \brief Query the z/OS enhanced ASCII auto-conversion status of a file
29+
/// descriptor and force the conversion if the file is not tagged with a
30+
/// codepage.
31+
std::error_code enableAutoConversion(int FD);
32+
33+
/// \brief Set the tag information for a file descriptor.
34+
std::error_code setFileTag(int FD, int CCSID, bool Text);
35+
36+
} // namespace llvm
37+
38+
#endif // __MVS__
39+
40+
#endif // LLVM_SUPPORT_AUTOCONVERT_H

llvm/lib/Support/AutoConvert.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//===- AutoConvert.cpp - Auto conversion between ASCII/EBCDIC -------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains functions used for auto conversion between
10+
// ASCII/EBCDIC codepages specific to z/OS.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifdef __MVS__
15+
16+
#include "llvm/Support/AutoConvert.h"
17+
#include <fcntl.h>
18+
#include <sys/stat.h>
19+
20+
std::error_code llvm::disableAutoConversion(int FD) {
21+
static const struct f_cnvrt Convert = {
22+
SETCVTOFF, // cvtcmd
23+
0, // pccsid
24+
(short)FT_BINARY, // fccsid
25+
};
26+
return fcntl(FD, F_CONTROL_CVT, &Convert);
27+
}
28+
29+
std::error_code llvm::enableAutoConversion(int FD) {
30+
struct f_cnvrt Query = {
31+
QUERYCVT, // cvtcmd
32+
0, // pccsid
33+
0, // fccsid
34+
};
35+
36+
if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
37+
return -1;
38+
39+
Query.cvtcmd = SETCVTALL;
40+
Query.pccsid =
41+
(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
42+
? 0
43+
: CCSID_UTF_8;
44+
// Assume untagged files to be IBM-1047 encoded.
45+
Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
46+
return fcntl(FD, F_CONTROL_CVT, &Query);
47+
}
48+
49+
std::error_code llvm::setFileTag(int FD, int CCSID, bool Text) {
50+
assert((!Text || (CCSID != FT_UNTAGGED && CCSID != FT_BINARY)) &&
51+
"FT_UNTAGGED and FT_BINARY are not allowed for text files");
52+
struct file_tag Tag;
53+
Tag.ft_ccsid = CCSID;
54+
Tag.ft_txtflag = Text;
55+
Tag.ft_deferred = 0;
56+
Tag.ft_rsvflags = 0;
57+
58+
return fcntl(FD, F_SETTAG, &Tag);
59+
}
60+
61+
#endif // __MVS__

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMSupport
9393
ARMAttributeParser.cpp
9494
ARMWinEH.cpp
9595
Allocator.cpp
96+
AutoConvert.cpp
9697
BinaryStreamError.cpp
9798
BinaryStreamReader.cpp
9899
BinaryStreamRef.cpp

llvm/lib/Support/MemoryBuffer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Support/MemoryBuffer.h"
1414
#include "llvm/ADT/SmallString.h"
1515
#include "llvm/Config/config.h"
16+
#include "llvm/Support/AutoConvert.h"
1617
#include "llvm/Support/Errc.h"
1718
#include "llvm/Support/Errno.h"
1819
#include "llvm/Support/FileSystem.h"
@@ -467,6 +468,12 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
467468
return std::move(Result);
468469
}
469470

471+
#ifdef __MVS__
472+
// Set codepage auto-conversion for z/OS.
473+
if (auto EC = llvm::enableAutoConversion(FD))
474+
return EC;
475+
#endif
476+
470477
auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
471478
if (!Buf) {
472479
// Failed to create a buffer. The only way it can fail is if

llvm/lib/Support/Unix/Path.inc

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ extern char **environ;
5050
#elif defined(__DragonFly__)
5151
#include <sys/mount.h>
5252
#elif defined(__MVS__)
53+
#include "llvm/Support/AutoConvert.h"
5354
#include <sys/ps.h>
5455
#endif
5556

@@ -959,8 +960,13 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags,
959960
// Nothing special, just don't add O_CREAT and we get these semantics.
960961
}
961962

963+
// Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
964+
// calling write(). Instead we need to use lseek() to set offset to EOF after
965+
// open().
966+
#ifndef __MVS__
962967
if (Flags & OF_Append)
963968
Result |= O_APPEND;
969+
#endif
964970

965971
#ifdef O_CLOEXEC
966972
if (!(Flags & OF_ChildInherit))
@@ -989,6 +995,88 @@ std::error_code openFile(const Twine &Name, int &ResultFD,
989995
assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed");
990996
}
991997
#endif
998+
999+
#ifdef __MVS__
1000+
/* Reason about auto-conversion and file tags. Setting the file tag only
1001+
* applies if file is opened in write mode:
1002+
*
1003+
* Text file:
1004+
* File exists File created
1005+
* CD_CreateNew n/a conv: on
1006+
* tag: set 1047
1007+
* CD_CreateAlways conv: auto conv: on
1008+
* tag: auto 1047 tag: set 1047
1009+
* CD_OpenAlways conv: auto conv: on
1010+
* tag: auto 1047 tag: set 1047
1011+
* CD_OpenExisting conv: auto n/a
1012+
* tag: unchanged
1013+
*
1014+
* Binary file:
1015+
* File exists File created
1016+
* CD_CreateNew n/a conv: off
1017+
* tag: set binary
1018+
* CD_CreateAlways conv: off conv: off
1019+
* tag: auto binary tag: set binary
1020+
* CD_OpenAlways conv: off conv: off
1021+
* tag: auto binary tag: set binary
1022+
* CD_OpenExisting conv: off n/a
1023+
* tag: unchanged
1024+
*
1025+
* Actions:
1026+
* conv: off -> auto-conversion is turned off
1027+
* conv: on -> auto-conversion is turned on
1028+
* conv: auto -> auto-conversion is turned on if the file is untagged
1029+
* tag: set 1047 -> set the file tag to text encoded in 1047
1030+
* tag: set binary -> set the file tag to binary
1031+
* tag: auto 1047 -> set file tag to 1047 if not set
1032+
* tag: auto binary -> set file tag to binary if not set
1033+
* tag: unchanged -> do not care about the file tag
1034+
*
1035+
* It is not possible to distinguish between the cases "file exists" and
1036+
* "file created". In the latter case, the file tag is not set and the file
1037+
* size is zero. The decision table boils down to:
1038+
*
1039+
* the file tag is set if
1040+
* - the file is opened for writing
1041+
* - the create disposition is not equal to CD_OpenExisting
1042+
* - the file tag is not set
1043+
* - the file size is zero
1044+
*
1045+
* This only applies if the file is a regular file. E.g. enabling
1046+
* auto-conversion for reading from /dev/null results in error EINVAL when
1047+
* calling read().
1048+
*
1049+
* Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
1050+
* calling write(). Instead we need to use lseek() to set offset to EOF after
1051+
* open().
1052+
*/
1053+
if ((Flags & OF_Append) && lseek(ResultFD, 0, SEEK_END) == -1)
1054+
return std::error_code(errno, std::generic_category());
1055+
struct stat Stat;
1056+
if (fstat(ResultFD, &Stat) == -1)
1057+
return std::error_code(errno, std::generic_category());
1058+
if (S_ISREG(Stat.st_mode)) {
1059+
bool DoSetTag = (Access & FA_Write) && (Disp != CD_OpenExisting) &&
1060+
!Stat.st_tag.ft_txtflag && !Stat.st_tag.ft_ccsid &&
1061+
Stat.st_size == 0;
1062+
if (Flags & OF_Text) {
1063+
if (auto EC = llvm::enableAutoConversion(ResultFD))
1064+
return EC;
1065+
if (DoSetTag) {
1066+
if (auto EC = llvm::setFileTag(ResultFD, CCSID_IBM_1047, true))
1067+
return EC;
1068+
}
1069+
} else {
1070+
if (auto EC = llvm::disableAutoConversion(ResultFD))
1071+
return EC;
1072+
if (DoSetTag) {
1073+
if (auto EC = llvm::setFileTag(ResultFD, FT_BINARY, false))
1074+
return EC;
1075+
}
1076+
}
1077+
}
1078+
#endif
1079+
9921080
return std::error_code();
9931081
}
9941082

llvm/test/Support/encoding.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; Checks if llc can deal with different char encodings.
2+
; This is only required for z/OS.
3+
;
4+
; UNSUPPORTED: !s390x-none-zos
5+
;
6+
; RUN: cat %s >%t && chtag -tc ISO8859-1 %t && llc %t -o - >/dev/null
7+
; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -tc IBM-1047 %t && llc %t -o - >/dev/null
8+
; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -r %t && llc %t -o - >/dev/null
9+
10+
@g_105 = external dso_local global i8, align 2

0 commit comments

Comments
 (0)