GitHub - OOZUIMEI/predict

Branches Tags

Name		Name	Last commit message	Last commit date
Latest commit History 379 Commits
.vscode		.vscode
docs		docs
map_editor		map_editor
vmap		vmap
.gitignore		.gitignore
DataGenerator.py		DataGenerator.py
NeuralNet.py		NeuralNet.py
README.rd		README.rd
adain.py		adain.py
apgan.py		apgan.py
apgan_lstm.py		apgan_lstm.py
apnet.py		apnet.py
apnet_china.py		apnet_china.py
aqi.py		aqi.py
arima.py		arima.py
attention_cell.py		attention_cell.py
baseline_cnnlstm.py		baseline_cnnlstm.py
capgan.py		capgan.py
craw_aws.py		craw_aws.py
craw_holiday.py		craw_holiday.py
craw_naver.py		craw_naver.py
craw_seoul_aqi.py		craw_seoul_aqi.py
craw_weather.py		craw_weather.py
crawler_daegu.py		crawler_daegu.py
crawling_all.py		crawling_all.py
crawling_aqicn.py		crawling_aqicn.py
crawling_base.py		crawling_base.py
decoder_cell.py		decoder_cell.py
district_neighbors.py		district_neighbors.py
evaluate.py		evaluate.py
fix_aqi.py		fix_aqi.py
gen_date.py		gen_date.py
get_part_data.py		get_part_data.py
griddata.py		griddata.py
heatmap.py		heatmap.py
load_labels.py		load_labels.py
mask_gan.py		mask_gan.py
mask_gan_2.py		mask_gan_2.py
maskedgan.py		maskedgan.py
model.py		model.py
model_utils.py		model_utils.py
naver.js		naver.js
plot.py		plot.py
plot_loss.py		plot_loss.py
predict_pm25.code-workspace		predict_pm25.code-workspace
preload_data.py		preload_data.py
process_label.py		process_label.py
process_sp_vector.py		process_sp_vector.py
process_vectors.py		process_vectors.py
requirements.txt		requirements.txt
rnn_utils.py		rnn_utils.py
rnn_utils_v2.py		rnn_utils_v2.py
server.py		server.py
spark_engine.py		spark_engine.py
srcns.py		srcns.py
stack_autoencoder.py		stack_autoencoder.py
svr.py		svr.py
test.py		test.py
test_gpu.py		test_gpu.py
test_tf.py		test_tf.py
test_xls.py		test_xls.py
tgan.py		tgan.py
tgan_old.py		tgan_old.py
tganlstm.py		tganlstm.py
tm.py		tm.py
tnet.py		tnet.py
tnetlstm.py		tnetlstm.py
train.py		train.py
train_sp.py		train_sp.py
train_sp_1.py		train_sp_1.py
train_sp_v_01.py		train_sp_v_01.py
transportation_processing.py		transportation_processing.py
udf.py		udf.py
utils.py		utils.py
wdm.scala		wdm.scala
weight_engine.py		weight_engine.py
windmap.py		windmap.py

Repository files navigation

If you want to use my code, please cite my paper at
@article{DBLP:journals/corr/abs-1804-07891,
  author    = {Tien{-}Cuong Bui and
               Van{-}Duc Le and
               Sang{-}Kyun Cha},
  title     = {A Deep Learning Approach for Air Pollution Forecasting in South Korea
               Using Encoder-Decoder Networks {\&} {LSTM}},
  journal   = {CoRR},
  volume    = {abs/1804.07891},
  year      = {2018},
  url       = {http://arxiv.org/abs/1804.07891},
  archivePrefix = {arXiv},
  eprint    = {1804.07891},
  timestamp = {Mon, 13 Aug 2018 16:48:29 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1804-07891},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

And:

@article{DBLP:journals/corr/abs-1804-07891,
  author    = {Tien{-}Cuong Bui and
               Van{-}Duc Le and
               Sang{-}Kyun Cha},
  title     = {A Deep Learning Approach for Air Pollution Forecasting in South Korea
               Using Encoder-Decoder Networks {\&} {LSTM}},
  journal   = {CoRR},
  volume    = {abs/1804.07891},
  year      = {2018},
  url       = {http://arxiv.org/abs/1804.07891},
  archivePrefix = {arXiv},
  eprint    = {1804.07891},
  timestamp = {Mon, 13 Aug 2018 16:48:29 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1804-07891},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}


Regular model training
# train normal:

python train.py -pr vectors/full_256_sep/trainlb2.txt -f vectors/full_256_sep/train_256_p2 -fw "basic"  -l mae -r 10 -e 256 -bs 32 -sl 24 -s 20 -ds 249 -il 1 -ci 0 -rn 1 -p "32_256_24h_20h_full_pm25cn_tf_fromfull"

# train transfer:

python train.py -pr vectors/full_256_sep/trainlb2 -f vectors/full_256_sep/train_256_p2 -fw "basic" -pr1 weights/32_256_24h_20h_full_pm25cndaegu.weights -l mae -r 10 -e 256 -bs 32 -sl 24 -s 20 -ds 249 -il 1 -ci 0 -rn 1 -p "32_256_24h_20h_full_pm25cn_tf_fromfull"

# test:
python test.py -f vectors/full_256_sep/test_256_p2 -pr vectors/full_256_sep/testlb2.txt -wurl weights/32_256_24h_20h_full_pm25cndaegu.weights  -sl 24 -r 10 -e 256 -il 1 -ds 249 -bs 4 -ci 0 -rn 1 -p "32_test_24h_24h_fullcn" -s 24

# train seoul only
python train.py -pr vectors/seoul_data/train_labels.txt -f vectors/seoul_data/train -e 190 -bs 32 -sl 24 -s 20 -ds 184 -rn 2 -p "32_190_seoul_24_20"

Crawling:
# crawl autonomous weather system
    $ nohup python craw_aws.py -t 0 -s "2010-01-01 00:00:00" -e "2011-12-31 00:00:00" -i 1 -si 10 &
    # crawl continously
    $ python craw_aws.py -f 1 -s "2018-07-19 00:00:00" -i 1 -f 1

# crawl air pollution 
    $ nohup python craw_seoul_aqi.py -t 0 -s "2010-01-01 00:00:00" -e "2011-12-31 00:00:00" -i 1 -si 10 &
    # continously
    $ python craw_seoul_aqi.py -f 1 -s "2018-07-20 00:00:00" -i 1

crawl daegu_aqi


# crawl weather
    $ python craw_weather.py -f 1

CNN - LSTM Training


PROCESS SPATIOTEMPORAL
- Convert array to vectors bin
python process_sp_vector.py -u raw/sp_seoul_test -u1 sp_seoul_test

- Convert Data to Vectors of china:
python process_vectors.py -u ~/Documents/datasets/spatio_temporal_ck/raw/sample_china -u1 ~/Documents/datasets/spatio_temporal_ck/sample_china_bin -t 2 -dim 17

- Convert bin to grid SEOUL - CHINA
python process_sp_vector.py -u vectors/spatiotemporal/china_combined/sp_seoul_test_bin -au vectors/spatiotemporal/china_combined/sp_china_test_bin -u1 vectors/spatiotemporal/china_combined/test -au1 vectors/spatiotemporal/china_combined/test_china -t 1
python process_sp_vector.py -u ~/Documents/datasets/spatio_temporal_ck/sample_seoul_bin -u1 ~/Documents/datasets/spatio_temporal_ck/sample_seoul_grid -t 1


# Dataset description
Input data
- the original binary data have a shape "data_size x 15" (#vector_features~PM2.5, PM10, ...)
- a grid is the visualized heat map of air pollution status that is an image with shape 25 x 25
- by using converting function from original data to grid data, we get grid data with shape data_size x 25 x 25 x 15 
- it's same for both test and train, only different in data_size
- before running model, indices of sequences are generated which are 24 steps of encoding & 24 steps of decoding.
- indices are used for looking up corresponding vectors from the dataset above (ds x 25 x 25 x 15)
- then, running data will have a shape of 24 x 25 x 25 x 15 for both encoding and decoding phase
- decoding data are stripped off 6 first elements (PM2.5, PM10, ...), which can be measured at the certain time, and only kept weather condition features => shape: 24 x 25 x 25 x 9
- don't forget to mention batch_size dimension then the every tensor will have shape batch_size x 24 x 25 x 25 x ....

Output prediction:
- we generate 24 images ahead so outputs are tensors which shape are batch_size x 24 x 25 x 25

In case of not using grid type training, we can remove 25 x 25 dimensions and replace it with 25 (#standing for the number of districts) 
=> encoding: batch_size x 24 x 25 x 15
=> decoding: batch_size x 24 x 25 x 9
=> output: batch_size x 24 x 25


# Training LSTM-CNN
Training
python train_sp.py -u vectors/sp_china_combined/seoul_1 -au vectors/sp_china_combined/china_1 -w gan_cuda_transcnn -m "CNN_LSTM" 
Testesting
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_grid -au vectors/sp_china_combined/sp_china_test_bin -w weights/gan_cuda.weights -rs 1 -t 1 -m "CNN_LSTM"


# GAN Training and Testing
Train GAN 
python train_sp.py -u vectors/sp_china_combined/seoul_1 -au vectors/sp_china_combined/china_1 -w gan_cuda_transcnn -e 15 -ds 9
Test GAN
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_grid -au vectors/sp_china_combined/sp_china_test_bin -w weights/gan_cuda.weights -rs 1 -t 1 -e 15 -ds 9

# CAPGAN - GAN training and testing with regular data (not grid)
python train_sp.py -u vectors/sp_china_combined/sp_seoul_train_bin -au vectors/sp_china_combined/sp_china_train_bin -w cap 
Test GAN
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_grid -au vectors/sp_china_combined/sp_china_test_bin -w weights/*.weights -rs 1 -t 1


# LSTM 
Training
python train_sp.py -u vectors/sp_china_combined/sp_seoul_train_bin -au vectors/sp_china_combined/sp_china_train_bin  -w lstm_only -m "CNN_LSTM" -cnn 0 -dt "dis"
Testing
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_bin -au vectors/sp_china_combined/sp_china_test_bin  -w weights/lstm_only.weights -m "CNN_LSTM" -cnn 0 -dt "dis" -t 1

# LSTM with gridvi 
Training 
python train_sp.py -u vectors/sp_china_combined/seoul_1 -au vectors/sp_china_combined/china_1  -w lstm_only_grid -m "CNN_LSTM" -cnn 0 -l mse
Testing
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_grid -au vectors/sp_china_combined/sp_china_test_bin  -w weights/lstm_only_grid.weights -m "CNN_LSTM" -l mse -cnn 0 -t 1


# Neural nets
Training:
python train_sp.py -u vectors/sp_china_combined/sp_seoul_train_bin -w neural_nets -m "NN"
Testing
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_bin -w weights/neural_nets.weights -m "NN" -t 1


# ADAIN with districts datasets
Training
python train_sp.py -u vectors/sp_china_combined/sp_seoul_train_bin  -w adain_dropout -m "ADAIN"
Testing 
python train_sp.py -u vectors/sp_china_combined/sp_seoul_test_bin  -w weights/adain_dropout.weights -m "ADAIN" -t 1 -r 1


#SAE
python train_sp.py -u vectors/sp_china_combined/sp_seoul_train_bin -w sae -m "SAE" -p 1 -dl 8 


# start visualization server
1. ssh docker server
ng serve --port 3000 --host 0.0.0.0
2. Crawling realtime data
nohup python crawling_all.py -f 1 -s "2018-12-16 00:00:00" &
nohup python craw_aws.py -f 1 -s "2018-12-16 00:00:00" &
nohup python crawling_aqicn.py -f 1 -s "2018-12-16 00:00:00" &
3. start prediction server
python server.py


# start tensorboard
tensorboard --logidr path_to_summaries_folder