Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Aug 14, 2018
1 parent d1d5d62 commit 5c8bc8b
Show file tree
Hide file tree
Showing 26 changed files with 70 additions and 120 deletions.
7 changes: 3 additions & 4 deletions src/bin/to_kana.rs
Expand Up @@ -3,20 +3,19 @@ extern crate wana_kana;
use std::env;
use std::slice::SliceConcatExt;

use std::io::{self, Read};
use std::io::prelude::*;
use std::io::{self, Read};

fn main() {
let args: String = env::args().skip(1).collect::<Vec<String>>().join(" ");
if args.len() > 0 {
println!("{}", wana_kana::to_kana::to_kana(&args));
}else{
} else {
let mut buffer = String::new();
io::stdin().read_to_string(&mut buffer).unwrap();
if buffer.len() > 0 {
// println!("{}", wana_kana::to_kana::to_kana(&buffer));
io::stdout().write(wana_kana::to_kana::to_kana(&buffer).as_bytes()).unwrap();
}
}

}
}
7 changes: 3 additions & 4 deletions src/bin/to_romaji.rs
Expand Up @@ -3,20 +3,19 @@ extern crate wana_kana;
use std::env;
use std::slice::SliceConcatExt;

use std::io::{self, Read};
use std::io::prelude::*;
use std::io::{self, Read};

fn main() {
let args: String = env::args().skip(1).collect::<Vec<String>>().join(" ");
if args.len() > 0 {
println!("{}", wana_kana::to_romaji::to_romaji(&args));
}else{
} else {
let mut buffer = String::new();
io::stdin().read_to_string(&mut buffer).unwrap();
if buffer.len() > 0 {
// println!("{}", wana_kana::to_romaji::to_romaji(&buffer));
io::stdout().write(wana_kana::to_romaji::to_romaji(&buffer).as_bytes()).unwrap();
}
}

}
}
1 change: 0 additions & 1 deletion src/constants.rs
Expand Up @@ -681,4 +681,3 @@ lazy_static! {
}

pub const FOUR_CHAR_EDGECASES: &'static [&'static str] = &["lts", "chy", "shy"];

1 change: 0 additions & 1 deletion src/is_hiragana.rs
@@ -1,4 +1,3 @@

//! Test if all chars of `input` are [Hiragana](https://en.wikipedia.org/wiki/Hiragana)
//!
//! # Examples
Expand Down
2 changes: 0 additions & 2 deletions src/is_japanese.rs
@@ -1,4 +1,3 @@

//! Test if `input` only includes [Kanji](https://en.wikipedia.org/wiki/Kanji), [Kana](https://en.wikipedia.org/wiki/Kana), zenkaku punctuation, japanese symbols and numbers.
//!
//! # Examples
Expand All @@ -21,4 +20,3 @@ pub fn is_japanese(input: &str) -> bool {
}
return input.chars().all(is_char_japanese);
}

2 changes: 0 additions & 2 deletions src/is_kana.rs
@@ -1,4 +1,3 @@

//! Test if all chars of `input` are [Kana](https://en.wikipedia.org/wiki/Kana) ([Katakana](https://en.wikipedia.org/wiki/Katakana) and/or [Hiragana](https://en.wikipedia.org/wiki/Hiragana))
//!
//! # Examples
Expand All @@ -19,4 +18,3 @@ pub fn is_kana(input: &str) -> bool {
}
return input.chars().all(is_char_kana);
}

1 change: 0 additions & 1 deletion src/is_kanji.rs
Expand Up @@ -12,7 +12,6 @@
//! assert_eq!(contains_kanji("🐸"), false);
//! ```


use utils::is_char_kanji::*;

pub fn is_kanji(input: &str) -> bool {
Expand Down
1 change: 0 additions & 1 deletion src/is_katakana.rs
Expand Up @@ -9,7 +9,6 @@
//! assert_eq!(is_katakana("あア"), false);
//! ```


use utils::is_char_katakana::*;

pub fn is_katakana(input: &str) -> bool {
Expand Down
7 changes: 3 additions & 4 deletions src/is_mixed.rs
Expand Up @@ -11,12 +11,11 @@
//! assert_eq!(is_mixed_pass_kanji("あア", true), false);
//! ```

use is_kanji::*;
use is_hiragana::is_hiragana;
use is_kanji::*;
use is_katakana::is_katakana;
use is_romaji::is_romaji;


pub fn is_mixed(input: &str) -> bool {
is_mixed_pass_kanji(input, true)
}
Expand All @@ -27,6 +26,6 @@ pub fn is_mixed_pass_kanji(input: &str, pass_kanji: bool) -> bool {
has_kanji = input.chars().any(|c| is_kanji(&c.to_string()));
}
return (input.chars().any(|c| is_hiragana(&c.to_string())) || input.chars().any(|c| is_katakana(&c.to_string())))
&& input.chars().any(|c| is_romaji(&c.to_string())) && !has_kanji;
&& input.chars().any(|c| is_romaji(&c.to_string()))
&& !has_kanji;
}

15 changes: 7 additions & 8 deletions src/lib.rs
Expand Up @@ -19,25 +19,24 @@ extern crate lazy_static;
extern crate fnv;
extern crate regex;

pub mod is_kanji;
pub mod is_hiragana;
pub mod is_japanese;
pub mod is_kana;
pub mod is_kanji;
pub mod is_katakana;
pub mod is_romaji;
pub mod is_japanese;
pub mod is_hiragana;
pub mod is_mixed;
pub mod is_romaji;

pub mod to_hiragana;
pub mod to_kana;
pub mod to_katakana;
pub mod to_hiragana;
pub mod to_romaji;

pub mod strip_okurigana;
pub mod tokenize;

mod utils;
mod options;
mod constants;
mod options;
mod utils;

pub use options::Options;

11 changes: 3 additions & 8 deletions src/strip_okurigana.rs
@@ -1,4 +1,3 @@

//! Strips trailing [Okurigana](https://en.wikipedia.org/wiki/Okurigana) if `input` is a mix of [Kanji](https://en.wikipedia.org/wiki/Kanji) and [Kana](https://en.wikipedia.org/wiki/Kana)
//!
//! # Examples
Expand All @@ -12,11 +11,11 @@
//! assert_eq!(strip_okurigana_all("お祝い", true), "祝");
//! ```

use utils::is_char_kana::*;
use utils::is_char_punctuation::*;
use is_japanese::*;
use is_kana::*;
use is_kanji::*;
use utils::is_char_kana::*;
use utils::is_char_punctuation::*;

pub fn strip_okurigana(input: &str) -> String {
strip_okurigana_all(input, false)
Expand All @@ -27,11 +26,7 @@ pub fn strip_okurigana_all(input: &str, all: bool) -> String {
}

if all {
return input
.chars()
.filter(|char| !is_char_kana(*char))
.into_iter()
.collect();
return input.chars().filter(|char| !is_char_kana(*char)).into_iter().collect();
}

// strip trailing only
Expand Down
8 changes: 3 additions & 5 deletions src/to_hiragana.rs
Expand Up @@ -9,12 +9,11 @@
//! assert_eq!(to_hiragana("wi"), "うぃ"); assert_eq!(to_hiragana_with_opt("wi", Options {use_obsolete_kana: true, ..Default::default() } ), "ゐ");
//! ```

use utils::katakana_to_hiragana::*;
use utils::romaji_to_hiragana::romaji_to_hiragana;
use is_romaji::*;
use is_mixed::*;
use is_romaji::*;
use options::Options;

use utils::katakana_to_hiragana::*;
use utils::romaji_to_hiragana::romaji_to_hiragana;

pub fn to_hiragana(input: &str) -> String {
to_hiragana_with_opt(input, Options::default())
Expand All @@ -33,4 +32,3 @@ pub fn to_hiragana_with_opt(input: &str, options: Options) -> String {
}
return katakana_to_hiragana(input);
}

42 changes: 12 additions & 30 deletions src/to_kana.rs
Expand Up @@ -17,18 +17,16 @@

use constants::{FOUR_CHAR_EDGECASES, FROM_ROMAJI, UPPERCASE_END, UPPERCASE_START};

use utils::is_char_in_range::*;
use utils::is_char_upper_case::*;
use utils::get_chunk::get_chunk;
use utils::is_char_consonant::*;
use utils::is_char_vowel::*;
use utils::hiragana_to_katakana::*;
use is_kana::*;
use options::Options;
use std;
use std::borrow::Cow;


use utils::get_chunk::get_chunk;
use utils::hiragana_to_katakana::*;
use utils::is_char_consonant::*;
use utils::is_char_in_range::*;
use utils::is_char_upper_case::*;
use utils::is_char_vowel::*;

pub fn to_kana(input: &str) -> String {
to_kana_with_opt(input, Options::default())
Expand Down Expand Up @@ -84,11 +82,8 @@ pub fn to_kana_with_opt(input: &str, options: Options) -> String {
}
}
// Handle edge case of n followed by n and vowel
if chunk_lc
.chars()
.nth(1)
.map(|c| is_char_consonant(c, false))
.unwrap_or(false) && chunk_lc.chars().nth(2).map(is_char_vowel).unwrap_or(false)
if chunk_lc.chars().nth(1).map(|c| is_char_consonant(c, false)).unwrap_or(false)
&& chunk_lc.chars().nth(2).map(is_char_vowel).unwrap_or(false)
{
chunk_size = 1;
chunk = Cow::from(get_chunk(input, cursor, cursor + chunk_size));
Expand Down Expand Up @@ -146,29 +141,17 @@ pub fn to_kana_with_opt(input: &str, options: Options) -> String {
.nth(cursor + 1)
.map(|c| c.to_string().to_lowercase() == "y")
.unwrap_or(false)
&& input
.chars()
.nth(cursor + 2)
.map(|c| !is_char_vowel(c))
.unwrap_or(true) || cursor == len - 1
|| input
.chars()
.nth(cursor + 1)
.map(|c| is_kana(&c.to_string()))
.unwrap_or(false)
&& input.chars().nth(cursor + 2).map(|c| !is_char_vowel(c)).unwrap_or(true)
|| cursor == len - 1
|| input.chars().nth(cursor + 1).map(|c| is_kana(&c.to_string())).unwrap_or(false)
{
// Don't transliterate this yet.
kana_char = Cow::from(chunk.chars().nth(0).unwrap().to_string());
}
}

// Use katakana if first letter in chunk is uppercase
if chunk
.chars()
.nth(0)
.map(|c| is_char_upper_case(c))
.unwrap_or(false)
{
if chunk.chars().nth(0).map(|c| is_char_upper_case(c)).unwrap_or(false) {
kana_char = Cow::from(hiragana_to_katakana(&kana_char));
}

Expand All @@ -178,4 +161,3 @@ pub fn to_kana_with_opt(input: &str, options: Options) -> String {
}
return kana;
}

4 changes: 2 additions & 2 deletions src/to_katakana.rs
Expand Up @@ -23,11 +23,11 @@
//! assert_eq!(to_katakana_with_opt("wi", Options {use_obsolete_kana: true, ..Default::default() }),"ヰ");
//! ```

use is_romaji::*;
use is_mixed::*;
use is_romaji::*;
use options::Options;
use utils::hiragana_to_katakana::*;
use utils::romaji_to_hiragana::*;
use options::Options;

pub fn to_katakana(input: &str) -> String {
to_katakana_with_opt(input, Options::default())
Expand Down
5 changes: 2 additions & 3 deletions src/to_romaji.rs
Expand Up @@ -10,12 +10,12 @@

use constants::TO_ROMAJI;

use utils::get_chunk::*;
use utils::katakana_to_hiragana::*;
use is_katakana::*;
use options::Options;
use std;
use std::borrow::Cow;
use utils::get_chunk::*;
use utils::katakana_to_hiragana::*;

pub fn to_romaji(kana: &str) -> String {
to_romaji_with_opt(kana, Options::default())
Expand Down Expand Up @@ -71,4 +71,3 @@ pub fn to_romaji_with_opt(kana: &str, options: Options) -> String {
}
roma
}

7 changes: 2 additions & 5 deletions src/tokenize.rs
Expand Up @@ -15,12 +15,11 @@
//! assert_eq!(tokenize("what the...私は「悲しい」。"), vec!["what the...", "私", "は", "「", "悲", "しい", "」。", ] );
//! ```


use itertools::Itertools;
use utils::is_char_hiragana::*;
use utils::is_char_japanese_punctuation::*;
use utils::is_char_kanji::*;
use utils::is_char_hiragana::*;
use utils::is_char_katakana::*;
use itertools::Itertools;

fn get_type(input: char) -> &'static str {
match input {
Expand All @@ -32,12 +31,10 @@ fn get_type(input: char) -> &'static str {
}
}


pub fn tokenize(input: &str) -> Vec<String> {
let mut result = vec![];
for (_, group) in &input.chars().group_by(|elt| get_type(*elt)) {
result.push(group.collect());
}
result
}

15 changes: 9 additions & 6 deletions src/utils/convert_fullwidth_chars_to_ascii.rs
Expand Up @@ -4,10 +4,9 @@
///
/// `text` Full Width roman letters
///

use constants::{UPPERCASE_START};
use utils::is_char_in_range::*;
use constants::UPPERCASE_START;
use std;
use utils::is_char_in_range::*;

pub const LOWERCASE_START: u32 = 0x61;
//pub const LOWERCASE_END: u32 = 0x7A;
Expand All @@ -23,14 +22,18 @@ pub const UPPERCASE_FULLWIDTH_END: u32 = 0xFF3A;

#[allow(dead_code)]
pub fn convert_fullwidth_chars_to_ascii(text: &str) -> String {
let ascii_chars = text.chars().map(|char|{
let ascii_chars = text.chars().map(|char| {
let code = char as u32;
let lower = is_char_in_range(char, LOWERCASE_FULLWIDTH_START, LOWERCASE_FULLWIDTH_END);
let upper = is_char_in_range(char, UPPERCASE_FULLWIDTH_START, UPPERCASE_FULLWIDTH_END);
if lower {
std::char::from_u32((code - LOWERCASE_FULLWIDTH_START) + LOWERCASE_START).unwrap().to_string();
std::char::from_u32((code - LOWERCASE_FULLWIDTH_START) + LOWERCASE_START)
.unwrap()
.to_string();
} else if upper {
std::char::from_u32((code - UPPERCASE_FULLWIDTH_START) + UPPERCASE_START).unwrap().to_string();
std::char::from_u32((code - UPPERCASE_FULLWIDTH_START) + UPPERCASE_START)
.unwrap()
.to_string();
}
return char;
});
Expand Down
6 changes: 1 addition & 5 deletions src/utils/get_chunk.rs
@@ -1,10 +1,6 @@
/// Returns a substring based on character position start/end values
pub fn get_chunk(text: &str, start: usize, end: usize) -> &str {
let start = text.char_indices().nth(start).map(|el| el.0).unwrap_or(0);
let end = text.char_indices()
.nth(end)
.map(|el| el.0)
.unwrap_or(text.len());
let end = text.char_indices().nth(end).map(|el| el.0).unwrap_or(text.len());
&text[start..end]
}

0 comments on commit 5c8bc8b

Please sign in to comment.