-
Notifications
You must be signed in to change notification settings - Fork 233
/
astc_toplevel.cpp
2497 lines (2210 loc) · 73 KB
/
astc_toplevel.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2013 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Top level functions - parsing command line, managing conversions,
* etc.
*
* This is also where main() lives.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifndef WIN32
#include <sys/time.h>
#include <pthread.h>
#include <unistd.h>
double get_time()
{
timeval tv;
gettimeofday(&tv, 0);
return (double)tv.tv_sec + (double)tv.tv_usec * 1.0e-6;
}
int astc_codec_unlink(const char *filename)
{
return unlink(filename);
}
#else
// Windows.h defines IGNORE, so we must #undef our own version.
#undef IGNORE
// Define pthread-like functions in terms of Windows threading API
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef HANDLE pthread_t;
typedef int pthread_attr_t;
int pthread_create(pthread_t * thread, const pthread_attr_t * attribs, void *(*threadfunc) (void *), void *thread_arg)
{
*thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) threadfunc, thread_arg, 0, NULL);
return 0;
}
int pthread_join(pthread_t thread, void **value)
{
WaitForSingleObject(thread, INFINITE);
return 0;
}
double get_time()
{
FILETIME tv;
GetSystemTimeAsFileTime(&tv);
unsigned __int64 ticks = tv.dwHighDateTime;
ticks = (ticks << 32) | tv.dwLowDateTime;
return ((double)ticks) / 1.0e7;
}
// Define an unlink() function in terms of the Win32 DeleteFile function.
int astc_codec_unlink(const char *filename)
{
BOOL res = DeleteFileA(filename);
return (res ? 0 : -1);
}
#endif
#ifdef DEBUG_CAPTURE_NAN
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <fenv.h>
#endif
// Define this to be 1 to allow "illegal" block sizes
#define DEBUG_ALLOW_ILLEGAL_BLOCK_SIZES 0
extern int block_mode_histogram[2048];
#ifdef DEBUG_PRINT_DIAGNOSTICS
int print_diagnostics = 0;
int diagnostics_tile = -1;
#endif
int print_tile_errors = 0;
int print_statistics = 0;
int progress_counter_divider = 1;
int rgb_force_use_of_hdr = 0;
int alpha_force_use_of_hdr = 0;
static double start_time;
static double end_time;
static double start_coding_time;
static double end_coding_time;
// code to discover the number of logical CPUs available.
#if defined(__APPLE__)
#define _DARWIN_C_SOURCE
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#else
#include <unistd.h>
#endif
unsigned get_number_of_cpus(void)
{
unsigned n_cpus = 1;
#ifdef __linux__
cpu_set_t mask;
CPU_ZERO(&mask);
sched_getaffinity(getpid(), sizeof(mask), &mask);
n_cpus = 0;
for (unsigned i = 0; i < CPU_SETSIZE; ++i)
{
if (CPU_ISSET(i, &mask))
n_cpus++;
}
if (n_cpus == 0)
n_cpus = 1;
#elif defined (_WIN32) || defined(__CYGWIN__)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
n_cpus = sysinfo.dwNumberOfProcessors;
#elif defined(__APPLE__)
int mib[4];
size_t length = 100;
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU;
sysctl(mib, 2, &n_cpus, &length, NULL, 0);
#endif
return n_cpus;
}
void astc_codec_internal_error(const char *filename, int linenum)
{
printf("Internal error: File=%s Line=%d\n", filename, linenum);
exit(1);
}
#define MAGIC_FILE_CONSTANT 0x5CA1AB13
struct astc_header
{
uint8_t magic[4];
uint8_t blockdim_x;
uint8_t blockdim_y;
uint8_t blockdim_z;
uint8_t xsize[3]; // x-size = xsize[0] + xsize[1] + xsize[2]
uint8_t ysize[3]; // x-size, y-size and z-size are given in texels;
uint8_t zsize[3]; // block count is inferred
};
int suppress_progress_counter = 0;
int perform_srgb_transform = 0;
astc_codec_image *load_astc_file(const char *filename, int bitness, astc_decode_mode decode_mode, swizzlepattern swz_decode)
{
int x, y, z;
FILE *f = fopen(filename, "rb");
if (!f)
{
printf("Failed to open file %s\n", filename);
exit(1);
}
astc_header hdr;
size_t hdr_bytes_read = fread(&hdr, 1, sizeof(astc_header), f);
if (hdr_bytes_read != sizeof(astc_header))
{
fclose(f);
printf("Failed to read file %s\n", filename);
exit(1);
}
uint32_t magicval = hdr.magic[0] + 256 * (uint32_t) (hdr.magic[1]) + 65536 * (uint32_t) (hdr.magic[2]) + 16777216 * (uint32_t) (hdr.magic[3]);
if (magicval != MAGIC_FILE_CONSTANT)
{
fclose(f);
printf("File %s not recognized\n", filename);
exit(1);
}
int xdim = hdr.blockdim_x;
int ydim = hdr.blockdim_y;
int zdim = hdr.blockdim_z;
if (xdim < 3 || xdim > 12 || ydim < 3 || ydim > 12 || (zdim < 3 && zdim != 1) || zdim > 12)
{
fclose(f);
printf("File %s not recognized %d %d %d\n", filename, xdim, ydim, zdim);
exit(1);
}
int xsize = hdr.xsize[0] + 256 * hdr.xsize[1] + 65536 * hdr.xsize[2];
int ysize = hdr.ysize[0] + 256 * hdr.ysize[1] + 65536 * hdr.ysize[2];
int zsize = hdr.zsize[0] + 256 * hdr.zsize[1] + 65536 * hdr.zsize[2];
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
int zblocks = (zsize + zdim - 1) / zdim;
uint8_t *buffer = (uint8_t *) malloc(xblocks * yblocks * zblocks * 16);
if (!buffer)
{
fclose(f);
printf("Ran out of memory\n");
exit(1);
}
size_t bytes_to_read = xblocks * yblocks * zblocks * 16;
size_t bytes_read = fread(buffer, 1, bytes_to_read, f);
fclose(f);
if (bytes_read != bytes_to_read)
{
printf("Failed to read file %s\n", filename);
exit(1);
}
astc_codec_image *img = allocate_image(bitness, xsize, ysize, zsize, 0);
initialize_image(img);
imageblock pb;
for (z = 0; z < zblocks; z++)
for (y = 0; y < yblocks; y++)
for (x = 0; x < xblocks; x++)
{
int offset = (((z * yblocks + y) * xblocks) + x) * 16;
uint8_t *bp = buffer + offset;
physical_compressed_block pcb = *(physical_compressed_block *) bp;
symbolic_compressed_block scb;
physical_to_symbolic(xdim, ydim, zdim, pcb, &scb);
decompress_symbolic_block(decode_mode, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, &scb, &pb);
write_imageblock(img, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_decode);
}
free(buffer);
return img;
}
struct encode_astc_image_info
{
int xdim;
int ydim;
int zdim;
const error_weighting_params *ewp;
uint8_t *buffer;
int *counters;
int pack_and_unpack;
int thread_id;
int threadcount;
astc_decode_mode decode_mode;
swizzlepattern swz_encode;
swizzlepattern swz_decode;
int *threads_completed;
const astc_codec_image *input_image;
astc_codec_image *output_image;
};
void *encode_astc_image_threadfunc(void *vblk)
{
const encode_astc_image_info *blk = (const encode_astc_image_info *)vblk;
int xdim = blk->xdim;
int ydim = blk->ydim;
int zdim = blk->zdim;
uint8_t *buffer = blk->buffer;
const error_weighting_params *ewp = blk->ewp;
int thread_id = blk->thread_id;
int threadcount = blk->threadcount;
int *counters = blk->counters;
int pack_and_unpack = blk->pack_and_unpack;
astc_decode_mode decode_mode = blk->decode_mode;
swizzlepattern swz_encode = blk->swz_encode;
swizzlepattern swz_decode = blk->swz_decode;
int *threads_completed = blk->threads_completed;
const astc_codec_image *input_image = blk->input_image;
astc_codec_image *output_image = blk->output_image;
imageblock pb;
int ctr = thread_id;
int pctr = 0;
int x, y, z, i;
int xsize = input_image->xsize;
int ysize = input_image->ysize;
int zsize = input_image->zsize;
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
int zblocks = (zsize + zdim - 1) / zdim;
int owns_progress_counter = 0;
for (z = 0; z < zblocks; z++)
for (y = 0; y < yblocks; y++)
for (x = 0; x < xblocks; x++)
{
if (ctr == 0)
{
int offset = ((z * yblocks + y) * xblocks + x) * 16;
uint8_t *bp = buffer + offset;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (diagnostics_tile < 0 || diagnostics_tile == pctr)
{
print_diagnostics = (diagnostics_tile == pctr) ? 1 : 0;
#endif
fetch_imageblock(input_image, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_encode);
symbolic_compressed_block scb;
compress_symbolic_block(input_image, decode_mode, xdim, ydim, zdim, ewp, &pb, &scb);
if (pack_and_unpack)
{
decompress_symbolic_block(decode_mode, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, &scb, &pb);
write_imageblock(output_image, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_decode);
}
else
{
physical_compressed_block pcb;
pcb = symbolic_to_physical(xdim, ydim, zdim, &scb);
*(physical_compressed_block *) bp = pcb;
}
#ifdef DEBUG_PRINT_DIAGNOSTICS
}
#endif
counters[thread_id]++;
ctr = threadcount - 1;
pctr++;
// routine to print the progress counter.
if (suppress_progress_counter == 0 && (pctr % progress_counter_divider) == 0 && print_tile_errors == 0 && print_statistics == 0)
{
int do_print = 1;
// the current thread has the responsibility for printing the progress counter
// if every previous thread has completed. Also, if we have ever received the
// responsibility to print the progress counter, we are going to keep it
// until the thread is completed.
if (!owns_progress_counter)
{
for (i = thread_id - 1; i >= 0; i--)
{
if (threads_completed[i] == 0)
{
do_print = 0;
break;
}
}
}
if (do_print)
{
owns_progress_counter = 1;
int summa = 0;
for (i = 0; i < threadcount; i++)
summa += counters[i];
printf("\r%d", summa);
fflush(stdout);
}
}
}
else
ctr--;
}
threads_completed[thread_id] = 1;
return NULL;
}
void encode_astc_image(const astc_codec_image * input_image,
astc_codec_image * output_image,
int xdim,
int ydim,
int zdim,
const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, swizzlepattern swz_decode, uint8_t * buffer, int pack_and_unpack, int threadcount)
{
int i;
int *counters = new int[threadcount];
int *threads_completed = new int[threadcount];
// before entering into the multithreadeed routine, ensure that the block size descriptors
// and the partition table descriptors needed actually exist.
get_block_size_descriptor(xdim, ydim, zdim);
get_partition_table(xdim, ydim, zdim, 0);
encode_astc_image_info *ai = new encode_astc_image_info[threadcount];
for (i = 0; i < threadcount; i++)
{
ai[i].xdim = xdim;
ai[i].ydim = ydim;
ai[i].zdim = zdim;
ai[i].buffer = buffer;
ai[i].ewp = ewp;
ai[i].counters = counters;
ai[i].pack_and_unpack = pack_and_unpack;
ai[i].thread_id = i;
ai[i].threadcount = threadcount;
ai[i].decode_mode = decode_mode;
ai[i].swz_encode = swz_encode;
ai[i].swz_decode = swz_decode;
ai[i].threads_completed = threads_completed;
ai[i].input_image = input_image;
ai[i].output_image = output_image;
counters[i] = 0;
threads_completed[i] = 0;
}
if (threadcount == 1)
encode_astc_image_threadfunc(&ai[0]);
else
{
pthread_t *threads = new pthread_t[threadcount];
for (i = 0; i < threadcount; i++)
pthread_create(&(threads[i]), NULL, encode_astc_image_threadfunc, (void *)(&(ai[i])));
for (i = 0; i < threadcount; i++)
pthread_join(threads[i], NULL);
delete[]threads;
}
delete[]ai;
delete[]counters;
delete[]threads_completed;
}
void store_astc_file(const astc_codec_image * input_image,
const char *filename, int xdim, int ydim, int zdim, const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, int threadcount)
{
int xsize = input_image->xsize;
int ysize = input_image->ysize;
int zsize = input_image->zsize;
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
int zblocks = (zsize + zdim - 1) / zdim;
uint8_t *buffer = (uint8_t *) malloc(xblocks * yblocks * zblocks * 16);
if (!buffer)
{
printf("Ran out of memory\n");
exit(1);
}
if (!suppress_progress_counter)
printf("%d blocks to process ..\n", xblocks * yblocks * zblocks);
encode_astc_image(input_image, NULL, xdim, ydim, zdim, ewp, decode_mode, swz_encode, swz_encode, buffer, 0, threadcount);
end_coding_time = get_time();
astc_header hdr;
hdr.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
hdr.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
hdr.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
hdr.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
hdr.blockdim_x = xdim;
hdr.blockdim_y = ydim;
hdr.blockdim_z = zdim;
hdr.xsize[0] = xsize & 0xFF;
hdr.xsize[1] = (xsize >> 8) & 0xFF;
hdr.xsize[2] = (xsize >> 16) & 0xFF;
hdr.ysize[0] = ysize & 0xFF;
hdr.ysize[1] = (ysize >> 8) & 0xFF;
hdr.ysize[2] = (ysize >> 16) & 0xFF;
hdr.zsize[0] = zsize & 0xFF;
hdr.zsize[1] = (zsize >> 8) & 0xFF;
hdr.zsize[2] = (zsize >> 16) & 0xFF;
FILE *wf = fopen(filename, "wb");
fwrite(&hdr, 1, sizeof(astc_header), wf);
fwrite(buffer, 1, xblocks * yblocks * zblocks * 16, wf);
fclose(wf);
free(buffer);
}
astc_codec_image *pack_and_unpack_astc_image(const astc_codec_image * input_image,
int xdim,
int ydim,
int zdim,
const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, swizzlepattern swz_decode, int bitness, int threadcount)
{
int xsize = input_image->xsize;
int ysize = input_image->ysize;
int zsize = input_image->zsize;
astc_codec_image *img = allocate_image(bitness, xsize, ysize, zsize, 0);
/*
allocate_output_image_space( bitness, xsize, ysize, zsize ); */
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
int zblocks = (zsize + zdim - 1) / zdim;
if (!suppress_progress_counter)
printf("%d blocks to process...\n", xblocks * yblocks * zblocks);
encode_astc_image(input_image, img, xdim, ydim, zdim, ewp, decode_mode, swz_encode, swz_decode, NULL, 1, threadcount);
if (!suppress_progress_counter)
printf("\n");
return img;
}
void find_closest_blockdim_2d(float target_bitrate, int *x, int *y, int consider_illegal)
{
int blockdims[6] = { 4, 5, 6, 8, 10, 12 };
float best_error = 1000;
float aspect_of_best = 1;
int i, j;
// Y dimension
for (i = 0; i < 6; i++)
{
// X dimension
for (j = i; j < 6; j++)
{
// NxN MxN 8x5 10x5 10x6
int is_legal = (j==i) || (j==i+1) || (j==3 && j==1) || (j==4 && j==1) || (j==4 && j==2);
if(consider_illegal || is_legal)
{
float bitrate = 128.0f / (blockdims[i] * blockdims[j]);
float bitrate_error = fabs(bitrate - target_bitrate);
float aspect = (float)blockdims[j] / blockdims[i];
if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best))
{
*x = blockdims[j];
*y = blockdims[i];
best_error = bitrate_error;
aspect_of_best = aspect;
}
}
}
}
}
void find_closest_blockdim_3d(float target_bitrate, int *x, int *y, int *z, int consider_illegal)
{
int blockdims[4] = { 3, 4, 5, 6 };
float best_error = 1000;
float aspect_of_best = 1;
int i, j, k;
for (i = 0; i < 4; i++) // Z
for (j = i; j < 4; j++) // Y
for (k = j; k < 4; k++) // X
{
// NxNxN MxNxN MxMxN
int is_legal = ((k==j)&&(j==i)) || ((k==j+1)&&(j==i)) || ((k==j)&&(j==i+1));
if(consider_illegal || is_legal)
{
float bitrate = 128.0f / (blockdims[i] * blockdims[j] * blockdims[k]);
float bitrate_error = fabs(bitrate - target_bitrate);
float aspect = (float)blockdims[k] / blockdims[j] + (float)blockdims[j] / blockdims[i] + (float)blockdims[k] / blockdims[i];
if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best))
{
*x = blockdims[k];
*y = blockdims[j];
*z = blockdims[i];
best_error = bitrate_error;
aspect_of_best = aspect;
}
}
}
}
void compare_two_files(const char *filename1, const char *filename2, int low_fstop, int high_fstop, int psnrmode)
{
int load_result1;
int load_result2;
astc_codec_image *img1 = astc_codec_load_image(filename1, 0, &load_result1);
if (load_result1 < 0)
{
printf("Failed to load file %s.\n", filename1);
exit(1);
}
astc_codec_image *img2 = astc_codec_load_image(filename2, 0, &load_result2);
if (load_result2 < 0)
{
printf("Failed to load file %s.\n", filename2);
exit(1);
}
int file1_components = load_result1 & 0x7;
int file2_components = load_result2 & 0x7;
int comparison_components = MAX(file1_components, file2_components);
int compare_hdr = 0;
if (load_result1 & 0x80)
compare_hdr = 1;
if (load_result2 & 0x80)
compare_hdr = 1;
compute_error_metrics(compare_hdr, comparison_components, img1, img2, low_fstop, high_fstop, psnrmode);
}
union if32
{
float f;
int32_t s;
uint32_t u;
};
// The ASTC codec is written with the assumption that a float threaded through
// the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision
// float, stored with round-to-nearest rounding. This is always the case in an
// IEEE-754 compliant system, however not every system is actually IEEE-754 compliant
// in the first place. As such, we run a quick test to check that this is actually the case
// (e.g. gcc on 32-bit x86 will typically fail unless -msse2 -mfpmath=sse2 is specified).
volatile float xprec_testval = 2.51f;
void test_inappropriate_extended_precision(void)
{
if32 p;
p.f = xprec_testval + 12582912.0f;
float q = p.f - 12582912.0f;
if (q != 3.0f)
{
printf("Single-precision test failed; please recompile with proper IEEE-754 support.\n");
exit(1);
}
}
// Debug routine to dump the entire image if requested.
void dump_image(astc_codec_image * img)
{
int x, y, z, xdim, ydim, zdim;
printf("\n\nDumping image ( %d x %d x %d + %d)...\n\n", img->xsize, img->ysize, img->zsize, img->padding);
if (img->zsize != 1)
zdim = img->zsize + 2 * img->padding;
else
zdim = img->zsize;
ydim = img->ysize + 2 * img->padding;
xdim = img->xsize + 2 * img->padding;
for (z = 0; z < zdim; z++)
{
if (z != 0)
printf("\n\n");
for (y = 0; y < ydim; y++)
{
if (y != 0)
printf("\n");
for (x = 0; x < xdim; x++)
{
printf(" 0x%08X", *(int unsigned *)&img->imagedata8[z][y][x]);
}
}
}
printf("\n\n");
}
int main(int argc, char **argv)
{
int i;
test_inappropriate_extended_precision();
// initialization routines
prepare_angular_tables();
build_quantization_mode_table();
start_time = get_time();
#ifdef DEBUG_CAPTURE_NAN
feenableexcept(FE_DIVBYZERO | FE_INVALID);
#endif
if (argc < 4)
{
printf( "ASTC codec version 1.3\n"
"Copyright (C) 2011-2013 ARM Limited\n"
"All rights reserved. Use of this software is subject to terms of its license.\n\n"
"Usage:\n"
"Compress to texture file:\n"
" %s -c <inputfile> <outputfile> <rate> [options]\n"
"Decompress from texture file:\n"
" %s -d <inputfile> <outputfile> [options]\n"
"Compress, then immediately decompress to image:\n"
" %s -t <inputfile> <outputfile> <rate> [options]\n"
"Compare two files (no compression or decompression):\n"
" %s -compare <file1> <file2> [options]\n"
"\n"
"When encoding/decoding a texture for use with the LDR-SRGB submode,\n"
"use -cs, -ds, -ts instead of -c, -d, -t.\n"
"When encoding/decoding a texture for use with the LDR-linear submode,\n"
"use -cl, -dl, -tl instead of -c, -d, -t.\n"
"\n"
"For compression, the input file formats supported are\n"
" * PNG (*.png)\n"
" * Targa (*.tga)\n"
" * JPEG (*.jpg)\n"
" * GIF (*.gif) (non-animated only)\n"
" * BMP (*.bmp)\n"
" * Radiance HDR (*.hdr)\n"
" * Khronos Texture KTX (*.ktx)\n"
" * DirectDraw Surface DDS (*.dds)\n"
" * Half-Float-TGA (*.htga)\n"
" * OpenEXR (*.exr; only if 'exr_to_htga' is present in the path)\n"
"\n"
"For the KTX and DDS formats, the following subset of the format\n"
"features are supported; the subset is:\n"
" * 2D and 3D textures supported\n"
" * Uncompressed only, with unorm8, unorm16, float16 or float32 components\n"
" * R, RG, RGB, BGR, RGBA, BGRA, Luminance and Luminance-Alpha texel formats\n"
" * In case of multiple image in one file (mipmap, cube-faces, texture-arrays)\n"
" the codec will read the first one and ignore the other ones.\n"
"\n"
"When using HDR or 3D textures, it is recommended to use the KTX or DDS formats.\n"
"Separate 2D image slices can be assembled into a 3D image using the -array option.\n"
"\n"
"The output file will be an ASTC compressed texture file (recommended filename\n"
"ending .astc)\n"
"\n"
"For decompression, the input file must be an ASTC compressed texture file;\n"
"the following formats are supported for output:\n"
" * Targa (*.tga)\n"
" * KTX (*.ktx)\n"
" * DDS (*.dds)\n"
" * Half-Float-TGA (*.htga)\n"
" * OpenEXR (*.exr; only if t'exr_to_htga' is present in the path)\n"
"\n"
"Targa is suitable only for 2D LDR images; for HDR and/or 3D images,\n"
"please use KTX or DDS.\n"
"\n"
"For compression, the <rate> argument specifies the bitrate or block\n"
"dimension to use. This argument can be specified in one of two ways:\n"
" * A decimal number (at least one actual decimal needed). This will cause \n"
" the codec to interpret the number as a desired bitrate, and pick a block\n"
" size to match that bitrate as closely as possible. For example, if you want a\n"
" bitrate of 2.0 bits per texel, then specifiy the <rate> argument as 2.0\n"
" * A block size. This specifies the block dimensions to use along the\n"
" X, Y (and for 3D textures) Z axes. The dimensions are separated with\n"
" the character x, with no spaces. For 2D textures, the supported\n"
" dimensions along each axis are picked from the set {4,5,6,8,10,12};\n"
" for 3D textures, the supported dimensions are picked from the\n"
" set {3,4,5,6}. For example, if you wish to encode a 2D texture using the\n"
" 10x6 block size (10 texels per block along the X axis, 6 texels per block\n"
" along the Y axis, then specify the <rate> argument as 10x6 .\n"
"Some examples of supported 2D block sizes are:\n"
" 4x4 -> 8.0 bpp\n"
" 5x5 -> 5.12 bpp\n"
" 6x6 -> 3.56 bpp\n"
" 8x6 -> 2.67 bpp\n"
" 8x8 -> 2.0 bpp\n"
" 10x8 -> 1.6 bpp\n"
" 10x10 -> 1.28 bpp\n"
" 10x12 -> 1.07 bpp\n"
" 12x12 -> 0.89 bpp\n"
"If you try to specify a bitrate that can potentially map to multiple different\n"
"block sizes, the codec will choose the block size with the least lopsided\n"
"aspect ratio (e.g. if you specify 2.67, then the codec will choose the\n"
"8x6 block size, not 12x4)\n"
"\n"
"Below is a description of all the available options. Most of them make sense\n"
"for encoding only, however there are some that affect decoding as well\n"
"(such as -dsw and the normal-presets)\n"
"\n"
"\n"
"Built-in error-weighting Presets:\n"
"---------------------------------\n"
"The presets provide easy-to-use combinations of encoding options that\n"
"are designed for use with certain commonly-occurring kinds of\n"
"textures.\n"
"\n"
" -normal_psnr\n"
" For encoding, assume that the input texture is a normal map with the\n"
" X and Y components of the actual normals in the Red and Green\n"
" color channels. The codec will then move the 2nd component to Alpha,\n"
" and apply an error-weighting function based on angular error.\n"
"\n"
" It is possible to use this preset with texture decoding as well,\n"
" in which case it will expand the normal map from 2 to 3 components\n"
" after the actual decoding.\n"
"\n"
" The -normal_psnr preset as a whole is equivalent to the options\n"
" \"-rn -esw rrrg -dsw raz1 -ch 1 0 0 1 -oplimit 1000 -mincorrel 0.99\" .\n"
"\n"
" -normal_percep\n"
" Similar to -normal_psnr, except that it tries to optimize the normal\n"
" map for best possible perceptual results instead of just maximizing\n"
" angular PSNR.\n"
" The -normal_percep preset as a whole is equivalent to the options\n"
" \"-normal_psnr -b 2.5 -v 3 1 1 0 50 0 -va 1 1 0 50 -dblimit 60\" .\n"
"\n"
" -mask\n"
" Assume that the input texture is a texture that contains\n"
" unrelated content in its various color channels, and where\n"
" it is undesirable for errors in one channel to affect\n"
" the other channels.\n"
" Equivalent to \"-v 3 1 1 0 25 0.03 -va 0 25\" .\n"
"\n"
" -alphablend\n"
" Assume that the input texture is an RGB-alpha texture where\n"
" the alpha component is used to represent opacity.\n"
" (0=fully transparent, 1=fully opaque)\n"
" Equivalent to \"-a 1\" .\n"
"\n"
" -hdr\n"
" Assume that the input texture is an HDR texture. If an alpha channel is\n"
" present, it is treated as an LDR channel (e.g. opacity)\n"
" Optimize for 4th-root error for the color and linear error for the alpha.\n"
" Equivalent to\n"
" \"-forcehdr_rgb -v 0 0.75 0 1 0 0 -va 0.02 1 0 0 -dblimit 999\"\n"
"\n"
" -hdra\n"
" Assume that the input texture is an HDR texture, and optimize\n"
" for 4th-root error. If an alpha channel is present, it is\n"
" assumed to be HDR and optimized for 4th-root error as well.\n"
" Equivalent to\n"
" \"-forcehdr_rgba -v 0 0.75 0 1 0 0 -va 0.75 0 1 0 -dblimit 999\"\n"
"\n"
" -hdr_log\n"
" -hdra_log\n"
" Assume that the input texture is an HDR texture, and optimize\n"
" for logarithmic error. This should give better results than -hdr\n"
" on metrics like \"logRMSE\" and \"mPSNR\", but the subjective\n"
" quality (in particular block artifacts) is generally significantly worse\n"
" than -hdr.\n"
" \"-hdr_log\" is equivalent to\n"
" \"-forcehdr_rgb -v 0 1 0 1 0 0 -va 0.02 1 0 0 -dblimit 999\"\n"
" \"-hdra_log\" is equivalent to\n"
" \"-forcehdr_rgba -v 0 1 0 1 0 0 -va 1 0 1 0 -dblimit 999\"\n"
"\n"
"\n"
"\n"
"Performance-quality tradeoff presets:\n"
"-------------------------------------\n"
"These are presets that provide different tradeoffs between encoding\n"
"performance and quality. Exactly one of these presets has to be specified\n"
"for encoding; if this is not done, the codec reports an error message.\n"
"\n"
" -veryfast\n"
" Run codec in very-fast-mode; this generally results in substantial\n"
" quality loss.\n"
"\n"
" -fast\n"
" Run codec in fast-mode. This generally results in mild quality loss.\n"
"\n"
" -medium\n"
" Run codec in medium-speed-mode.\n"
"\n"
" -thorough\n"
" Run codec in thorough-mode. This should be sufficient to fix most\n"
" cases where \"-medium\" provides inadequate quality.\n"
"\n"
" -exhaustive\n"
" Run codec in exhaustive-mode. This usually produces only\n"
" marginally better quality than \"-thorough\" while considerably\n"
" increasing encode time.\n"
"\n"
"\n"
"Low-level error weighting options:\n"
"----------------------------------\n"
"These options provide low-level control of the error-weighting options\n"
"that the codec provides.\n"
"\n"
" -v <radius> <power> <baseweight> <avgscale> <stdevscale> <mixing-factor>\n"
" Compute the per-texel relative error weighting for the RGB color\n"
" channels as follows:\n"
"\n"
" weight = 1 / (<baseweight> + <avgscale>\n"
" * average^2 + <stdevscale> * stdev^2)\n"
"\n"
" The average and stdev are computed as the average-value and the\n"
" standard deviation across a neighborhood of each texel; the <radius>\n"
" argument specifies how wide this neighborhood should be.\n"
" If this option is given without -va, it affects the weighting of RGB\n"
" color components only, while alpha is assigned the weight 1.0 .\n"
"\n"
" The <mixing-factor> parameter is used to control the degree of mixing\n"
" between color channels. Setting this parameter to 0 causes the average\n"
" and stdev computation to be done completely separately for each color\n"
" channel; setting it to 1 causes the results from the red, green and\n"
" blue color channel to be combined into a single result that is applied\n"
" to all three channels. It is possible to set the mixing factor\n"
" to a value between 0 and 1 in order to obtain a result in-between.\n"
"\n"
" The <power> argument is a power used to raise the values of the input\n"
" pixels before computing average and stdev; e.g. a power of 0.5 causes\n"
" the codec to take the square root of every input pixel value before\n"
" computing the averages and standard deviations.\n"
"\n"
" -va <baseweight> <power> <avgscale> <stdevscale>\n"
" Used together with -v; it computes a relative per-texel\n"
" weighting for the alpha component based on average and standard\n"
" deviation in the same manner as described for -v, but with its own\n"
" <baseweight>, <power>, <avgscale> and <stdevscale> parameters.\n"
"\n"
" -a <radius>\n"
" For textures with alpha channel, scale per-texel weights by\n"
" alpha. The alpha value chosen for scaling of any particular texel\n"
" is taken as an average across a neighborhood of the texel.\n"
" The <radius> argument gives the radius of this neighborhood;\n"
" a radius of 0 causes the texel's own alpha value to be used with\n"
" no contribution from neighboring texels.\n"
"\n"
" -ch <red_weight> <green_weight> <blue_weight> <alpha_weight>\n"
" Assign relative weight to each color channel.\n"
" If this option is combined with any of the other options above,\n"
" the other options are used to compute a weighting, then the \n"
" weigthing is multiplied by the weighting provided by this argument.\n"
"\n"
" -rn\n"
" Assume that the red and alpha color channels (after swizzle)\n"
" represent the X and Y components for a normal map,\n"
" and scale the error weighting so as to match angular error as closely\n"
" as possible. The reconstruction function for the Z component\n"
" is assumed to be Z=sqrt(1 - X^2 - X^2).\n"
"\n"
" -b <weighting>\n"
" Increase error weight for texels at compression-block edges\n"
" and corners; the parameter specifies how much the weights are to be\n"
" modified, with 0 giving no modification. Higher values should reduce\n"
" block-artifacts, at the cost of worsening other artifacts.\n"
"\n"
"\n"
"Low-level performance-quality tradeoff options:\n"
"-----------------------------------------------\n"
"These options provide low-level control of the performance-quality tradeoffs\n"
"that the codec provides.\n"
"\n"
" -plimit <number>\n"
" Test only <number> different partitionings. Higher numbers give better\n"
" quality at the expense of longer decode time; however large values tend\n"
" to give diminishing returns. This parameter can be set to a\n"
" number from 1 to %d. By default, this limit is set based on the active\n"
" preset, as follows:\n"
" -veryfast : 2\n"
" -fast : 4\n"
" -medium : 25\n"
" -thorough : 100\n"
" -exhaustive : %d\n"
"\n"
" -dblimit <number>\n"
" Stop compression work on a block as soon as the PSNR of the block,\n"
" as measured in dB, exceeds this limit. Higher numbers give better\n"
" quality at the expense of longer encode times. If not set explicitly,\n"
" it is set based on the currently-active block size and preset, as listed\n"
" below (where N is the number of texels per block):\n"
"\n"
" -veryfast : dblimit = MAX( 53-19*log10(N), 70-35*log10(N) )\n"