libchardet - Mozilla's Universal Charset Detector C/C++ API
C++ C Makefile Other
Switch branches/tags
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
include fixed missing update version to 1.0.6 Mar 7, 2017
m4 update copyright Mar 7, 2017
man update copyright Mar 7, 2017
patch Adding Id, Rev, Date, Author and Revision property to all files May 1, 2011
project
src
test build test with builded object and header in source directory May 17, 2016
tools fixed #1 separate model directory May 4, 2016
.gitignore add tags May 4, 2016
.travis.yml add travis-ci configuration May 17, 2016
Changelog fixed #12 No include guard Mar 7, 2017
INSTALL update distribution support May 16, 2016
LICENSE Mozilla's Universal Charset Detector C/C++ API Feb 21, 2009
Makefile.am missing dist project Feb 14, 2014
Makefile.in fixed #1 separate model directory May 4, 2016
README.md
acconfig.h fixed typo (CHARDEC_CONFIG_H to CHARDET_CONFIG_H) Aug 20, 2016
autogen add removing '*~' files Aug 20, 2016
chardet-config.h.in fixed #1 separate model directory May 4, 2016
chardet-config.in
chardet.pc.in fixed duplicated path on chardet.pc Feb 13, 2014
configure fixed #10 autogen failure because AM_PROG_AR with automake 1.11.1 Aug 19, 2016
configure.ac
libchardet.spec.in change url to github project Aug 19, 2016

README.md

libchardet - Mozilla's Universal Charset Detector C/C++ API

Build Status GitHub license GitHub last release GitHub closed issues GitHub closed pull requests

License

Copyright (c) 2017 JoungKyun.Kim http://oops.org All rights reserved.

This program is under MPL 1.1 or LGPL 2.1

Description

libchardet is based on Mozilla Universal Charset Detector library and, detects the character set used to encode data.

Original code was writed by Netscape Communications Corporation, Techniques used by universalchardet are described at <http://www-archive.mozilla.org/projects/intl/UniversalCharsetDetection.html>.

libchardet see also John Gardiner Myers's Encode-Detect-1.01 perl module, and added C wrapping API, and library build environment with libtool.

From 1.0.5, libchardet was reflected single-byte charset detection confidence algorithm of uchardet and new language models. (Arabic, Danish, Esperanto, German, Spanish, Turkish, Vietnamese)

Installation

See also INSTALL document

Sample Codes

See also test directory of source code

       #include <chardet.h>

       int main (void) {
            DetectObj *obj;
            char * str = "안녕하세요";

            if ( (obj = detect_obj_init ()) == NULL ) {
                 fprintf (stderr, "Memory Allocation failed\n");
                 return CHARDET_MEM_ALLOCATED_FAIL;
            }

       #ifndef CHARDET_BINARY_SAFE 
            // before 1.0.5. This API is deprecated on 1.0.5
            switch (detect (str, &obj))
       #else
            // from 1.0.5
            switch (detect_r (str, strlen (str), &obj))
       #endif
            {
                 case CHARDET_OUT_OF_MEMORY :
                      fprintf (stderr, "On handle processing, occured out of memory\n");
                      detect_obj_free (&obj);
                      return CHARDET_OUT_OF_MEMORY;
                 case CHARDET_NULL_OBJECT :
                      fprintf (stderr,
                                "2st argument of chardet() is must memory allocation "
                                "with detect_obj_init API\n");
                      return CHARDET_NULL_OBJECT;
            }

            printf ("encoding: %s, confidence: %f\n", obj->encoding, obj->confidence);
            detect_obj_free (&obj);

           return 0;
       }

or looping code

       #include <chardet.h>

       int main (void) {
            Detect    * d;
            DetectObj * obj;
            char * str = "안녕하세요";

            if ( (d = detect_init ()) == NULL ) {
                 fprintf (stderr, "chardet handle initialize failed\n");
                 return CHARDET_MEM_ALLOCATED_FAIL;
            }

            while ( 1 ) {
                detect_reset (&d);

                if ( (obj = detect_obj_init ()) == NULL ) {
                     fprintf (stderr, "Memory Allocation failed\n");
                     return CHARDET_MEM_ALLOCATED_FAIL;
                }

       #ifndef CHARDET_BINARY_SAFE 
                // before 1.0.5. This API is deprecated on 1.0.5
                switch (detect_handledata (&d, str,, &obj))
       #else
                // from 1.0.5
                switch (detect_handledata_r (&d, str, strlen (str), &obj))
       #endif
                {
                     case CHARDET_OUT_OF_MEMORY :
                          fprintf (stderr, "On handle processing, occured out of memory\n");
                          detect_obj_free (&obj);
                          return CHARDET_OUT_OF_MEMORY;
                     case CHARDET_NULL_OBJECT :
                          fprintf (stderr,
                                    "2st argument of chardet() is must memory allocation "
                                    "with detect_obj_init API\n");
                          return CHARDET_NULL_OBJECT;
                }

                printf ("encoding: %s, confidence: %f\n", obj->encoding, obj->confidence);
                detect_obj_free (&obj);

                if ( 1 )
                    break;
            }
            detect_destroy (&d);

           return 0;
       }

APIs