Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dependencies #111

Merged
merged 21 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,155 changes: 536 additions & 619 deletions Cargo.lock

Large diffs are not rendered by default.

39 changes: 20 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,58 +16,57 @@ async-channel = "1.8.0"
async-stream = "0.3.3"
async-trait = "0.1.57"
axum = "0.6.9"
axum-extra = {version = "0.6.0", features = ["spa"]}
axum-extra = {version = "0.8.0"}
axum-macros = "0.3.4"
base64 = "0.13.0"
base64 = "0.21.4"
bincode = "1.3.3"
bitvec = "1.0.1"
bytemuck = {version = "1.13.1", features = ["derive"]}
byteorder = "1.4.3"
bzip2 = "0.4.4"
chitchat = "0.5.0"
chrono = {version = "0.4.23", features = ["serde"]}
clap = {version = "3.1.18", features = ["derive"]}
criterion = "0.3.6"
clap = {version = "4.4.6", features = ["derive"]}
criterion = "0.5.1"
crossbeam-channel = "0.5.6"
csv = "1.1.6"
dashmap = {version = "5.4.0", features = ["rayon"]}
encoding_rs = "0.8.31"
eventsource-stream = "0.2.3"
fend-core = "1.2.2"
flate2 = "1.0.23"
flate2 = "1.0.28"
fst = {version = "0.4.7", features = ["levenshtein"]}
futures = "0.3.21"
half = {version = "2.2.1", features = ["serde"]}
hashbrown = {version = "0.14.0", features = ["serde", "rkyv"]}
http = "0.2.8"
image = "0.24.3"
indicatif = {version = "0.16.2", features = ["rayon"]}
indicatif = {version = "0.17.7", features = ["rayon"]}
insta = "1.31"
itertools = "0.10.3"
lalrpop = {version = "0.19.8", features = ["lexer"]}
lalrpop-util = {version = "0.19.8", features = ["lexer"]}
itertools = "0.11.0"
lalrpop = {version = "0.20.0", features = ["lexer"]}
lalrpop-util = {version = "0.20.0", features = ["lexer"]}
libc = "0.2.142"
logos = "0.12.1"
logos = "0.13.0"
lz-str = "0.2.1"
lz4_flex = "0.11.1"
maplit = "1.0.2"
md5 = "0.7.0"
memmap = "0.7.0"
memmap2 = "0.5.10"
memmap2 = "0.9.0"
mime = "0.3.17"
min-max-heap = "1.3.0"
num_cpus = "1.15.0"
once_cell = "1.13.1"
parse_wiki_text = "0.1.5"
proptest = "1.2.0"
proptest-derive = "0.3.0"
proptest-derive = "0.4.0"
publicsuffix = "2.2.3"
quick-xml = "0.23.0"
quick-xml = "0.30.0"
rand = "0.8.5"
rayon = "1.5.3"
regex = "1.6.0"
reqwest = {version = "0.11.16", features = ["blocking", "stream", "json"]}
reqwest-eventsource = "0.4.0"
ring = "0.17.3"
rio_api = "0.8.4"
rio_turtle = "0.8.4"
Expand All @@ -86,7 +85,7 @@ rusqlite = {version = "0.29.0", features = [
rust-s3 = {version = "0.33.0", features = ["blocking", "tokio"]}
rust-stemmers = "1.2.0"
safetensors = "0.3.1"
scylla = "0.7.0"
scylla = { git = "https://github.com/scylladb/scylla-rust-driver", rev = "82c1c99f0ff86509f9dd1e649ecdaddc5a3660cf" }
serde = {version = "1.0.137", features = ["rc", "derive"]}
serde_json = "1.0.81"
serde_urlencoded = "0.7.1"
Expand All @@ -97,14 +96,16 @@ tikv-jemallocator = "0.5"
tokenizers = "0.13.2"
tokio = {version = "1.23.1", features = ["full"]}
tokio-stream = "0.1.11"
toml = "0.5.9"
torch-sys = "0.13.0"
toml = "0.8.2"
tower-http = {version = "0.4.0", features = ["compression-gzip", "cors"]}
tracing = "0.1.34"
tracing-subscriber = {version = "0.3.11", features = ["env-filter"]}
url = {version = "2.4.0", features = ["serde"]}
urlencoding = "2.1.2"
utoipa = {version = "3.4.4", features = ["axum_extras"]}
utoipa-swagger-ui = {version = "3.1.4", features = ["axum"]}
utoipa = {version = "4.0.0", features = ["axum_extras"]}
utoipa-swagger-ui = {version = "4.0.0", features = ["axum"]}
uuid = "1.1.2"
whatlang = "0.16.0"

[profile.test.package]
flate2.opt-level = 3
4 changes: 1 addition & 3 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ version = "0.1.0"
cors = []
default = ["cors", "with_alice", "libtorch"]
dev = ["cors"]
libtorch = ["dep:tch", "dep:torch-sys"]
libtorch = ["dep:tch"]
prod = ["cors", "libtorch"]
with_alice = ["libtorch"]

Expand Down Expand Up @@ -74,7 +74,6 @@ rand = {workspace = true}
rayon = {workspace = true}
regex = {workspace = true}
reqwest = {workspace = true}
reqwest-eventsource = {workspace = true}
ring = {workspace = true}
rio_api = {workspace = true}
rio_turtle = {workspace = true}
Expand All @@ -95,7 +94,6 @@ tokenizers = {workspace = true}
tokio = {workspace = true}
tokio-stream = {workspace = true}
toml = {workspace = true}
torch-sys = {workspace = true, optional = true}
tower-http = {workspace = true}
tracing = {workspace = true}
tracing-subscriber = {workspace = true}
Expand Down
7 changes: 5 additions & 2 deletions core/examples/alice.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::io::Write;

use base64::Engine;
use stract::alice::{
ExecutionState, {AcceleratorConfig, Alice},
ExecutionState, BASE64_ENGINE, {AcceleratorConfig, Alice},
};

#[tokio::main]
Expand All @@ -12,7 +13,9 @@ async fn main() {
tracing::subscriber::set_global_default(subscriber).unwrap();

// dont use this key in production
let key = base64::decode("URyJQTjwUjTq6FSRoZGdbUdTIvqs/QxkPacQio8Lhxc=").unwrap();
let key = BASE64_ENGINE
.decode("URyJQTjwUjTq6FSRoZGdbUdTIvqs/QxkPacQio8Lhxc=")
.unwrap();

let model = Alice::open(
"data/alice",
Expand Down
10 changes: 6 additions & 4 deletions core/src/alice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ use aes_gcm::{
AeadCore, Aes256Gcm, Key, KeyInit, Nonce,
};
use anyhow::anyhow;
pub use base64::prelude::BASE64_STANDARD as BASE64_ENGINE;
use base64::Engine;
use flate2::{bufread::GzDecoder, write::GzEncoder, Compression};
use half::bf16;
use itertools::Itertools;
Expand Down Expand Up @@ -407,13 +409,13 @@ pub struct EncodedEncryptedState(String);
impl EncodedEncryptedState {
pub fn encode(state: EncryptedState) -> Self {
let bytes = bincode::serialize(&state).unwrap();
let encoded = base64::encode(bytes);
let encoded = BASE64_ENGINE.encode(bytes);

Self(encoded)
}

pub fn decode(self) -> Result<EncryptedState> {
let bytes = base64::decode(self.0)?;
let bytes = BASE64_ENGINE.decode(self.0)?;
let state = bincode::deserialize(&bytes)?;

Ok(state)
Expand Down Expand Up @@ -477,11 +479,11 @@ pub fn compress_state(state: Vec<Vec<f32>>) -> CompressedState {
let compressed = encoder.finish().unwrap();

// base64 encode the compressed state
CompressedState(base64::encode(compressed))
CompressedState(BASE64_ENGINE.encode(compressed))
}

pub fn decompress_state(state: CompressedState) -> Result<Vec<Vec<f32>>> {
let state = base64::decode(state.0)?;
let state = BASE64_ENGINE.decode(state.0)?;

let mut decoder = GzDecoder::new(&state[..]);

Expand Down
21 changes: 9 additions & 12 deletions core/src/crawler/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ impl WorkerThread {

// parse xml
let mut reader = quick_xml::Reader::from_str(&body);
let mut buf = Vec::new();

let mut urls = vec![];

Expand All @@ -556,28 +555,28 @@ impl WorkerThread {
let mut in_loc = false;

loop {
match reader.read_event(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
if e.name() == b"sitemap" {
if e.name().as_ref() == b"sitemap" {
in_sitemap = true;
} else if e.name() == b"url" {
} else if e.name().as_ref() == b"url" {
in_url = true;
} else if e.name() == b"loc" {
} else if e.name().as_ref() == b"loc" {
in_loc = true;
}
}
Ok(Event::End(ref e)) => {
if e.name() == b"sitemap" {
if e.name().as_ref() == b"sitemap" {
in_sitemap = false;
} else if e.name() == b"url" {
} else if e.name().as_ref() == b"url" {
in_url = false;
} else if e.name() == b"loc" {
} else if e.name().as_ref() == b"loc" {
in_loc = false;
}
}
Ok(Event::Text(e)) => {
if in_sitemap && in_loc {
if let Ok(url) = Url::parse(&e.unescape_and_decode(&reader).unwrap()) {
if let Ok(url) = Url::parse(&e.unescape().unwrap()) {
urls.append(
&mut self.urls_from_sitemap(url, depth + 1, max_depth).await,
);
Expand All @@ -587,7 +586,7 @@ impl WorkerThread {
.await;
}
} else if in_url && in_loc {
if let Ok(url) = Url::parse(&e.unescape_and_decode(&reader).unwrap()) {
if let Ok(url) = Url::parse(&e.unescape().unwrap()) {
urls.push(url);
}
}
Expand All @@ -599,8 +598,6 @@ impl WorkerThread {
}
_ => (),
}

buf.clear();
}

urls
Expand Down
5 changes: 3 additions & 2 deletions core/src/entity_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::{
time::Duration,
};

use base64::{prelude::BASE64_STANDARD as BASE64_ENGINE, Engine};
use serde::{Deserialize, Serialize};
use tantivy::{
collector::TopDocs,
Expand Down Expand Up @@ -412,7 +413,7 @@ impl EntityIndex {
Vec::new()
};

let image_id = base64::encode(&title);
let image_id = BASE64_ENGINE.encode(&title);
let image_id = if self.retrieve_image(&image_id).is_some() {
Some(image_id)
} else {
Expand Down Expand Up @@ -444,7 +445,7 @@ impl EntityIndex {
}

pub fn retrieve_image(&self, key: &str) -> Option<Image> {
let key = base64::decode(key).ok()?;
let key = BASE64_ENGINE.decode(key).ok()?;
let key = String::from_utf8(key).ok()?;

self.image_store.get(&key)
Expand Down
7 changes: 4 additions & 3 deletions core/src/entrypoint/alice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ use axum::{
routing::{get, post},
Router,
};
use base64::Engine;
use tokio::sync::Mutex;
use tokio_stream::Stream;
use tokio_stream::StreamExt as _;
use tracing::info;

use crate::{
alice::{Alice, EncodedEncryptedState, EncryptedState},
alice::{Alice, EncodedEncryptedState, EncryptedState, BASE64_ENGINE},
config::AliceLocalConfig,
distributed::{
cluster::Cluster,
Expand Down Expand Up @@ -169,7 +170,7 @@ pub async fn route(

pub async fn run(config: AliceLocalConfig) -> Result<(), anyhow::Error> {
let addr: SocketAddr = config.host;
let key = base64::decode(config.encryption_key)?;
let key = BASE64_ENGINE.decode(config.encryption_key)?;

info!("starting alice");
let alice = Alice::open(
Expand Down Expand Up @@ -201,5 +202,5 @@ pub async fn run(config: AliceLocalConfig) -> Result<(), anyhow::Error> {

pub fn generate_key() {
let key = Aes256Gcm::generate_key(OsRng);
println!("{}", base64::encode(key.as_slice()));
println!("{}", BASE64_ENGINE.encode(key.as_slice()));
}
2 changes: 2 additions & 0 deletions core/src/entrypoint/autosuggest_scrape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ fn suggestions(query: &str, gl: &str) -> Result<Vec<String>> {
Ok(suggestions)
}

#[derive(Clone)]
pub enum Gl {
Us,
}
Expand Down Expand Up @@ -111,6 +112,7 @@ pub fn run<P: AsRef<Path>>(
pb.set_style(
ProgressStyle::default_bar()
.template("{spinner:.green} [{elapsed_precise}] [{wide_bar}] {pos:>7}/{len:7} ({eta})")
.unwrap()
.progress_chars("#>-"),
);

Expand Down
34 changes: 18 additions & 16 deletions core/src/entrypoint/dmoz_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,45 +53,47 @@ impl<R: BufRead> Iterator for PageIterator<R> {
let mut inside_topic = false;
let mut inside_desc = false;

while let Ok(event) = self.reader.read_event(&mut self.buf) {
while let Ok(event) = self.reader.read_event_into(&mut self.buf) {
match event {
Event::Start(ref e) if e.name() == b"ExternalPage" => {
Event::Start(ref e) if e.name().as_ref() == b"ExternalPage" => {
let url_attr = e
.attributes()
.filter(std::result::Result::is_ok)
.find(|attr| attr.as_ref().unwrap().key == b"about")
.find(|attr| attr.as_ref().unwrap().key.as_ref() == b"about")
.unwrap()
.unwrap();
let url = self.reader.decode(&url_attr.value).unwrap().to_string();
current_page = Some(Page::new(url));
let url = url_attr.decode_and_unescape_value(&self.reader).unwrap();
current_page = Some(Page::new(url.into_owned()));
inside_desc = false;
inside_topic = false;
}
Event::End(ref e) if e.name() == b"ExternalPage" => break,
Event::Start(ref e) if e.name() == b"topic" && current_page.is_some() => {
Event::End(ref e) if e.name().as_ref() == b"ExternalPage" => break,
Event::Start(ref e) if e.name().as_ref() == b"topic" && current_page.is_some() => {
inside_topic = true;
}
Event::End(ref e) if e.name() == b"topic" && current_page.is_some() => {
Event::End(ref e) if e.name().as_ref() == b"topic" && current_page.is_some() => {
inside_topic = false;
}
Event::Start(ref e) if e.name() == b"d:Description" && current_page.is_some() => {
Event::Start(ref e)
if e.name().as_ref() == b"d:Description" && current_page.is_some() =>
{
inside_desc = true;
}
Event::End(ref e) if e.name() == b"d:Description" && current_page.is_some() => {
Event::End(ref e)
if e.name().as_ref() == b"d:Description" && current_page.is_some() =>
{
inside_desc = false;
}
Event::Text(ref e) => {
if inside_topic {
if let Some(page) = &mut current_page {
let bytes = e.unescaped().unwrap();
let topic = self.reader.decode(&bytes).unwrap();
page.topic.push_str(topic);
let topic = e.unescape().unwrap();
page.topic.push_str(topic.as_ref());
}
} else if inside_desc {
if let Some(page) = &mut current_page {
let bytes = e.unescaped().unwrap();
let desc = self.reader.decode(&bytes).unwrap();
page.description.push_str(desc);
let desc = e.unescape().unwrap();
page.description.push_str(desc.as_ref());
}
}
}
Expand Down
Loading