-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
ViewController.m
709 lines (602 loc) · 34.1 KB
/
ViewController.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#import "ViewController.h"
#import <AVFoundation/AVFoundation.h>
#import <MicrosoftCognitiveServicesSpeech/SPXSpeechApi.h>
@interface ViewController () {
NSString *speechKey;
NSString *serviceRegion;
NSString *pronunciationAssessmentReferenceText;
}
@property (strong, nonatomic) IBOutlet UIButton *recognizeFromFileButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeFromMicButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeWithPhraseHintButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeWithPushStreamButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeWithPullStreamButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeWithAutoLanguageDetectionButton;
@property (strong, nonatomic) IBOutlet UIButton *pronunciationAssessFromMicButton;
@property (strong, nonatomic) IBOutlet UIButton *recognizeKeywordFromFileButton;
@property (strong, nonatomic) IBOutlet UILabel *recognitionResultLabel;
- (IBAction)recognizeFromFileButtonTapped:(UIButton *)sender;
- (IBAction)recognizeFromMicButtonTapped:(UIButton *)sender;
- (IBAction)recognizeWithPhraseHintButtonTapped:(UIButton *)sender;
- (IBAction)recognizeWithPushStreamButtonTapped:(UIButton *)sender;
- (IBAction)recognizeWithPullStreamButtonTapped:(UIButton *)sender;
- (IBAction)recognizeWithAutoLanguageDetectionButtonTapped:(UIButton *)sender;
- (IBAction)recognizeKeywordFromFileButtonTapped:(UIButton *)sender;
- (IBAction)pronunciationAssessFromMicButtonTapped:(UIButton *)sender;
@end
@implementation ViewController
- (void)viewDidLoad {
[super viewDidLoad];
speechKey = @"YourSubscriptionKey";
serviceRegion = @"YourServiceRegion";
pronunciationAssessmentReferenceText = @"Hello world.";
[self.view setBackgroundColor:[UIColor whiteColor]];
self.recognizeFromMicButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeFromMicButton addTarget:self action:@selector(recognizeFromMicButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeFromMicButton setTitle:@"Start rec from mic" forState:UIControlStateNormal];
[self.recognizeFromMicButton setFrame:CGRectMake(50.0, 100.0, 300.0, 50.0)];
self.recognizeFromMicButton.accessibilityIdentifier = @"recognize_mic_button";
[self.view addSubview:self.recognizeFromMicButton];
self.recognizeFromFileButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeFromFileButton addTarget:self action:@selector(recognizeFromFileButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeFromFileButton setTitle:@"Start rec from file" forState:UIControlStateNormal];
[self.recognizeFromFileButton setFrame:CGRectMake(50.0, 150.0, 300.0, 50.0)];
self.recognizeFromFileButton.accessibilityIdentifier = @"recognize_file_button";
[self.view addSubview:self.recognizeFromFileButton];
self.recognizeWithPhraseHintButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeWithPhraseHintButton addTarget:self action:@selector(recognizeWithPhraseHintButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeWithPhraseHintButton setTitle:@"Start rec from file with PhraseHint" forState:UIControlStateNormal];
[self.recognizeWithPhraseHintButton setFrame:CGRectMake(50.0, 200.0, 300.0, 50.0)];
self.recognizeWithPhraseHintButton.accessibilityIdentifier = @"recognize_phrase_hint_button";
[self.view addSubview:self.recognizeWithPhraseHintButton];
self.recognizeWithPushStreamButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeWithPushStreamButton addTarget:self action:@selector(recognizeWithPushStreamButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeWithPushStreamButton setTitle:@"Start rec from file with push stream" forState:UIControlStateNormal];
[self.recognizeWithPushStreamButton setFrame:CGRectMake(50.0, 250.0, 300.0, 50.0)];
self.recognizeWithPushStreamButton.accessibilityIdentifier = @"recognize_push_stream_button";
[self.view addSubview:self.recognizeWithPushStreamButton];
self.recognizeWithPullStreamButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeWithPullStreamButton addTarget:self action:@selector(recognizeWithPullStreamButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeWithPullStreamButton setTitle:@"Start rec from file with pull stream" forState:UIControlStateNormal];
[self.recognizeWithPullStreamButton setFrame:CGRectMake(50.0, 300.0, 300.0, 50.0)];
self.recognizeWithPullStreamButton.accessibilityIdentifier = @"recognize_pull_stream_button";
[self.view addSubview:self.recognizeWithPullStreamButton];
self.recognizeWithAutoLanguageDetectionButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeWithAutoLanguageDetectionButton addTarget:self action:@selector(recognizeWithAutoLanguageDetectionButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeWithAutoLanguageDetectionButton setTitle:@"Start rec with auto language detection" forState:UIControlStateNormal];
[self.recognizeWithAutoLanguageDetectionButton setFrame:CGRectMake(50.0, 350.0, 300.0, 50.0)];
self.recognizeWithAutoLanguageDetectionButton.accessibilityIdentifier = @"recognize_language_detection_button";
[self.view addSubview:self.recognizeWithAutoLanguageDetectionButton];
self.recognizeKeywordFromFileButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.recognizeKeywordFromFileButton addTarget:self action:@selector(recognizeKeywordFromFileButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.recognizeKeywordFromFileButton setTitle:@"Recognize keyword" forState:UIControlStateNormal];
[self.recognizeKeywordFromFileButton setFrame:CGRectMake(50.0, 400.0, 300.0, 50.0)];
self.recognizeKeywordFromFileButton.accessibilityIdentifier = @"recognize_keyword_button";
[self.view addSubview:self.recognizeKeywordFromFileButton];
self.pronunciationAssessFromMicButton = [UIButton buttonWithType:UIButtonTypeSystem];
[self.pronunciationAssessFromMicButton addTarget:self action:@selector(pronunciationAssessFromMicButtonTapped:) forControlEvents:UIControlEventTouchUpInside];
[self.pronunciationAssessFromMicButton setTitle:[NSString stringWithFormat:@"Start pronuciation assessment \n (Read out \"%@\")", pronunciationAssessmentReferenceText] forState:UIControlStateNormal];
[self.pronunciationAssessFromMicButton titleLabel].lineBreakMode = NSLineBreakByWordWrapping;
[self.pronunciationAssessFromMicButton titleLabel].textAlignment = NSTextAlignmentCenter;
[self.pronunciationAssessFromMicButton setFrame:CGRectMake(50.0, 450.0, 300.0, 50.0)];
self.pronunciationAssessFromMicButton.accessibilityIdentifier = @"pronuciation_assessment_button";
[self.view addSubview:self.pronunciationAssessFromMicButton];
self.recognitionResultLabel = [[UILabel alloc] initWithFrame:CGRectMake(50.0, 350.0, 300.0, 400.0)];
self.recognitionResultLabel.lineBreakMode = NSLineBreakByWordWrapping;
self.recognitionResultLabel.numberOfLines = 0;
self.recognitionResultLabel.accessibilityIdentifier = @"result_label";
[self.recognitionResultLabel setText:@"Press a button!"];
[self.view addSubview:self.recognitionResultLabel];
}
- (IBAction)recognizeFromFileButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeFromFile];
});
}
- (IBAction)recognizeFromMicButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeFromMicrophone];
});
}
- (IBAction)recognizeWithPhraseHintButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeWithPhraseHint];
});
}
- (IBAction)recognizeWithPushStreamButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeWithPushStream];
});
}
- (IBAction)recognizeWithPullStreamButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeWithPullStream];
});
}
- (IBAction)recognizeWithAutoLanguageDetectionButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeWithAutoLanguageDetection];
});
}
- (IBAction)recognizeKeywordFromFileButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self recognizeKeywordFromFile];
});
}
- (IBAction)pronunciationAssessFromMicButtonTapped:(UIButton *)sender {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
[self pronunciationAssessFromMicrophone];
});
}
/*
* Performs speech recognition from a RIFF wav file.
*/
- (void)recognizeFromFile {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *weatherFile = [mainBundle pathForResource: @"whatstheweatherlike" ofType:@"wav"];
NSLog(@"weatherFile path: %@", weatherFile);
if (!weatherFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
SPXAudioConfiguration* weatherAudioSource = [[SPXAudioConfiguration alloc] initWithWavFileInput:weatherFile];
if (!weatherAudioSource) {
NSLog(@"Loading audio file failed!");
[self updateRecognitionErrorText:(@"Audio Error")];
return;
}
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
[self updateRecognitionStatusText:(@"Recognizing...")];
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] initWithSpeechConfiguration:speechConfig audioConfiguration:weatherAudioSource];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
SPXSpeechRecognitionResult *speechResult = [speechRecognizer recognizeOnce];
if (SPXResultReason_Canceled == speechResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:speechResult];
NSLog(@"Speech recognition was canceled: %@. Did you pass the correct key/region combination?", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedSpeech == speechResult.reason) {
NSLog(@"Speech recognition result received: %@", speechResult.text);
[self updateRecognitionResultText:(speechResult.text)];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Speech Recognition Error")];
}
}
/*
* Performs speech recognition on audio data from the default microphone.
*/
- (void)recognizeFromMicrophone {
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
[self updateRecognitionStatusText:(@"Recognizing...")];
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] init:speechConfig];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
SPXSpeechRecognitionResult *speechResult = [speechRecognizer recognizeOnce];
if (SPXResultReason_Canceled == speechResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:speechResult];
NSLog(@"Speech recognition was canceled: %@. Did you pass the correct key/region combination?", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedSpeech == speechResult.reason) {
NSLog(@"Speech recognition result received: %@", speechResult.text);
[self updateRecognitionResultText:(speechResult.text)];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Speech Recognition Error")];
}
}
/*
* Performs speech recognition on audio data from a wav file, with recognition hints in the form of a text phrase.
*/
- (void)recognizeWithPhraseHint {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *beachFile = [mainBundle pathForResource: @"wreck-a-nice-beach" ofType:@"wav"];
if (!beachFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
SPXAudioConfiguration* weatherAudioSource = [[SPXAudioConfiguration alloc] initWithWavFileInput:beachFile];
if (!weatherAudioSource) {
NSLog(@"Loading audio file failed!");
[self updateRecognitionErrorText:(@"Audio Error")];
return;
}
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] initWithSpeechConfiguration:speechConfig audioConfiguration:weatherAudioSource];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
// add a phrase hint to the recognizer's grammar
SPXPhraseListGrammar * phraseListGrammar = [[SPXPhraseListGrammar alloc] initWithRecognizer:speechRecognizer];
[phraseListGrammar addPhrase:@"Wreck a nice beach"];
[self updateRecognitionStatusText:(@"Recognizing...")];
SPXSpeechRecognitionResult *speechResult = [speechRecognizer recognizeOnce];
if (SPXResultReason_Canceled == speechResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:speechResult];
NSLog(@"Speech recognition was canceled: %@. Did you pass the correct key/region combination?", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedSpeech == speechResult.reason) {
NSLog(@"Speech recognition result received: %@", speechResult.text);
[self updateRecognitionResultText:(speechResult.text)];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Speech Recognition Error")];
}
}
/*
* Performs continuous speech recognition using a push stream for audio data.
*/
- (void)recognizeWithPushStream {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *weatherFile = [mainBundle pathForResource: @"whatstheweatherlike" ofType:@"wav"];
NSLog(@"weatherFile path: %@", weatherFile);
if (!weatherFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
NSURL *targetUrl = [NSURL URLWithString:weatherFile];
NSError *error = nil;
AVAudioFile *audioFile = [[AVAudioFile alloc] initForReading:targetUrl commonFormat:AVAudioPCMFormatInt16 interleaved:NO error:&error];
if (error)
{
NSLog(@"Error while opening file: %@", error);
[self updateRecognitionErrorText:(@"Error opening audio file")];
return;
}
// check the format of the file
NSAssert(1 == audioFile.fileFormat.channelCount, @"Bad channel count");
NSAssert(16000 == audioFile.fileFormat.sampleRate, @"Unexpected sample rate");
// set up the stream
SPXAudioStreamFormat *audioFormat = [[SPXAudioStreamFormat alloc] initUsingPCMWithSampleRate:audioFile.fileFormat.sampleRate bitsPerSample:16 channels:1];
SPXPushAudioInputStream* stream;
stream = [[SPXPushAudioInputStream alloc] initWithAudioFormat:audioFormat];
SPXAudioConfiguration* audioConfig = [[SPXAudioConfiguration alloc] initWithStreamInput:stream];
if (!audioConfig) {
NSLog(@"Error creating stream!");
[self updateRecognitionErrorText:(@"Error creating stream!")];
return;
}
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] initWithSpeechConfiguration:speechConfig audioConfiguration:audioConfig];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
// connect callbacks
[speechRecognizer addRecognizingEventHandler: ^ (SPXSpeechRecognizer *recognizer, SPXSpeechRecognitionEventArgs *eventArgs) {
NSLog(@"Received intermediate result event. SessionId: %@, recognition result:%@. Status %ld. offset %llu duration %llu resultid:%@", eventArgs.sessionId, eventArgs.result.text, (long)eventArgs.result.reason, eventArgs.result.offset, eventArgs.result.duration, eventArgs.result.resultId);
[self updateRecognitionStatusText:eventArgs.result.text];
}];
[speechRecognizer addRecognizedEventHandler: ^ (SPXSpeechRecognizer *recognizer, SPXSpeechRecognitionEventArgs *eventArgs) {
NSLog(@"Received final result event. SessionId: %@, recognition result:%@. Status %ld. offset %llu duration %llu resultid:%@", eventArgs.sessionId, eventArgs.result.text, (long)eventArgs.result.reason, eventArgs.result.offset, eventArgs.result.duration, eventArgs.result.resultId);
[self updateRecognitionResultText:eventArgs.result.text];
}];
// start recognizing
[self updateRecognitionStatusText:(@"Recognizing from push stream...")];
[speechRecognizer startContinuousRecognition];
// set up the buffer fo push data into the stream
const AVAudioFrameCount nBytesToRead = 5000;
const NSInteger bytesPerFrame = audioFile.fileFormat.streamDescription->mBytesPerFrame;
AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:audioFile.fileFormat frameCapacity:nBytesToRead / bytesPerFrame];
NSAssert(1 == buffer.stride, @"only one channel allowed");
NSAssert(nil != buffer.int16ChannelData, @"assure correct format");
// push data to stream
while (1)
{
NSError *bufferError = nil;
bool success = [audioFile readIntoBuffer:buffer error:&bufferError];
if (!success) {
NSLog(@"Read error on stream: %@", bufferError);
[stream close];
break;
}
else
{
NSInteger nBytesRead = [buffer frameLength] * bytesPerFrame;
if (0 == nBytesRead)
{
[stream close];
break;
}
NSLog(@"Read %d bytes from file", (int)nBytesRead);
NSData *data = [NSData dataWithBytesNoCopy:buffer.int16ChannelData[0] length:nBytesRead freeWhenDone:NO];
NSLog(@"%d bytes data returned", (int)[data length]);
[stream write:data];
NSLog(@"Wrote %d bytes to stream", (int)[data length]);
}
[NSThread sleepForTimeInterval:0.05f];
}
[speechRecognizer stopContinuousRecognition];
}
/*
* Performs continuous speech recognition using a pull stream for audio data.
*/
- (void)recognizeWithPullStream {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *weatherFile = [mainBundle pathForResource: @"whatstheweatherlike" ofType:@"wav"];
NSLog(@"weatherFile path: %@", weatherFile);
if (!weatherFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
NSURL *targetUrl = [NSURL URLWithString:weatherFile];
NSError *error = nil;
AVAudioFile *audioFile = [[AVAudioFile alloc] initForReading:targetUrl commonFormat:AVAudioPCMFormatInt16 interleaved:NO error:&error];
if (error)
{
NSLog(@"Error while opening file: %@", error);
[self updateRecognitionErrorText:(@"Error opening audio file")];
return;
}
const NSInteger bytesPerFrame = audioFile.fileFormat.streamDescription->mBytesPerFrame;
// check the format of the file
NSAssert(1 == audioFile.fileFormat.channelCount, @"Bad channel count");
NSAssert(16000 == audioFile.fileFormat.sampleRate, @"Unexpected sample rate");
// set up the stream with the pull callback
SPXPullAudioInputStream* stream = [[SPXPullAudioInputStream alloc]
initWithReadHandler:
^NSInteger(NSMutableData *data, NSUInteger size) {
AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:audioFile.fileFormat frameCapacity:(AVAudioFrameCount) size / bytesPerFrame];
NSError *bufferError = nil;
bool success = [audioFile readIntoBuffer:buffer error:&bufferError];
NSInteger nBytes = 0;
if (!success) {
// returns 0 to close the stream on read error.
NSLog(@"Read error on stream: %@", bufferError);
}
else
{
// number of bytes in the buffer
nBytes = [buffer frameLength] * bytesPerFrame;
NSRange range;
range.location = 0;
range.length = nBytes;
NSAssert(1 == buffer.stride, @"only one channel allowed");
NSAssert(nil != buffer.int16ChannelData, @"assure correct format");
[data replaceBytesInRange:range withBytes:buffer.int16ChannelData[0]];
NSLog(@"%d bytes data returned", (int)[data length]);
}
// returns the number of bytes that have been read, 0 closes the stream.
return nBytes;
}
closeHandler:
^(void) {
}];
SPXAudioConfiguration* audioConfig = [[SPXAudioConfiguration alloc] initWithStreamInput:stream];
if (!audioConfig) {
NSLog(@"Error creating stream!");
[self updateRecognitionErrorText:(@"Error creating stream!")];
return;
}
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] initWithSpeechConfiguration:speechConfig audioConfiguration:audioConfig];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
// connect callbacks
[speechRecognizer addRecognizingEventHandler: ^ (SPXSpeechRecognizer *recognizer, SPXSpeechRecognitionEventArgs *eventArgs) {
NSLog(@"Received intermediate result event. SessionId: %@, recognition result:%@. Status %ld. offset %llu duration %llu resultid:%@", eventArgs.sessionId, eventArgs.result.text, (long)eventArgs.result.reason, eventArgs.result.offset, eventArgs.result.duration, eventArgs.result.resultId);
[self updateRecognitionStatusText:eventArgs.result.text];
}];
[speechRecognizer addRecognizedEventHandler: ^ (SPXSpeechRecognizer *recognizer, SPXSpeechRecognitionEventArgs *eventArgs) {
NSLog(@"Received final result event. SessionId: %@, recognition result:%@. Status %ld. offset %llu duration %llu resultid:%@", eventArgs.sessionId, eventArgs.result.text, (long)eventArgs.result.reason, eventArgs.result.offset, eventArgs.result.duration, eventArgs.result.resultId);
[self updateRecognitionResultText:eventArgs.result.text];
}];
// session stopped callback to recognize stream has ended
__block bool end = false;
[speechRecognizer addSessionStoppedEventHandler: ^ (SPXRecognizer *recognizer, SPXSessionEventArgs *eventArgs) {
NSLog(@"Received session stopped event. SessionId: %@", eventArgs.sessionId);
end = true;
}];
// start recognizing
[self updateRecognitionStatusText:(@"Recognizing from pull stream...")];
[speechRecognizer startContinuousRecognition];
// wait until a session stopped event has been received
while (end == false)
[NSThread sleepForTimeInterval:1.0f];
[speechRecognizer stopContinuousRecognition];
}
/*
* Performs speech recognition with auto source language detection
*/
- (void)recognizeWithAutoLanguageDetection {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *weatherFile = [mainBundle pathForResource: @"whatstheweatherlike" ofType:@"wav"];
NSLog(@"weatherFile path: %@", weatherFile);
if (!weatherFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
SPXAudioConfiguration* weatherAudioSource = [[SPXAudioConfiguration alloc] initWithWavFileInput:weatherFile];
if (!weatherAudioSource) {
NSLog(@"Loading audio file failed!");
[self updateRecognitionErrorText:(@"Audio Error")];
return;
}
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
NSArray *languages = @[@"zh-CN", @"en-US"];
SPXAutoDetectSourceLanguageConfiguration* autoDetectSourceLanguageConfig = [[SPXAutoDetectSourceLanguageConfiguration alloc]init:languages];
[self updateRecognitionStatusText:(@"Recognizing...")];
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] initWithSpeechConfiguration:speechConfig
autoDetectSourceLanguageConfiguration:autoDetectSourceLanguageConfig
audioConfiguration:weatherAudioSource];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
SPXSpeechRecognitionResult *speechResult = [speechRecognizer recognizeOnce];
if (SPXResultReason_Canceled == speechResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:speechResult];
NSLog(@"Speech recognition was canceled: %@. Did you pass the correct key/region combination?", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedSpeech == speechResult.reason) {
SPXAutoDetectSourceLanguageResult *languageResult = [[SPXAutoDetectSourceLanguageResult alloc] init:speechResult];
NSLog(@"Speech recognition result received: %@ in language %@", speechResult.text, [languageResult language]);
NSString *resultText = [NSString stringWithFormat:@"Language: %@, %@", [languageResult language], speechResult.text];
[self updateRecognitionResultText:(resultText)];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Speech Recognition Error")];
}
}
/*
* Performs keyword recognition from a wav file using kws.table keyword model
*/
- (void)recognizeKeywordFromFile {
NSBundle *mainBundle = [NSBundle mainBundle];
NSString *kwsWeatherFile = [mainBundle pathForResource: @"kws_whatstheweatherlike" ofType:@"wav"];
NSLog(@"kws_weatherFile path: %@", kwsWeatherFile);
if (!kwsWeatherFile) {
NSLog(@"Cannot find audio file!");
[self updateRecognitionErrorText:(@"Cannot find audio file")];
return;
}
SPXAudioConfiguration* audioFileInput = [[SPXAudioConfiguration alloc] initWithWavFileInput:kwsWeatherFile];
if (!audioFileInput) {
NSLog(@"Loading audio file failed!");
[self updateRecognitionErrorText:(@"Audio Error")];
return;
}
NSString *keywordModelFile = [mainBundle pathForResource: @"kws" ofType:@"table"];
NSLog(@"keyword model file path: %@", keywordModelFile);
if (!keywordModelFile) {
NSLog(@"Cannot find keyword model file!");
[self updateRecognitionErrorText:(@"Cannot find keyword model file")];
return;
}
SPXKeywordRecognitionModel* keywordRecognitionModel = [[SPXKeywordRecognitionModel alloc] initFromFile:keywordModelFile];
SPXKeywordRecognizer* keywordRecognizer = [[SPXKeywordRecognizer alloc] init:audioFileInput];
if (!keywordRecognizer) {
NSLog(@"Could not create keyword recognizer");
[self updateRecognitionResultText:(@"Keyword Recognition Error")];
return;
}
dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
__block SPXKeywordRecognitionResult * keywordResult;
[keywordRecognizer recognizeOnceAsync: ^ (SPXKeywordRecognitionResult *srresult) {
keywordResult = srresult;
dispatch_semaphore_signal(semaphore);
}keywordModel:keywordRecognitionModel];
[self updateRecognitionStatusText:(@"Waiting for keyword detected...")];
dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
if (SPXResultReason_Canceled == keywordResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:keywordResult];
NSLog(@"Keyword recognition was canceled: %@.", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedKeyword == keywordResult.reason) {
NSLog(@"Keyword recognition result received: %@", keywordResult.text);
[self updateRecognitionResultText:(keywordResult.text)];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Keyword Recognition Error")];
}
}
/*
* Performs pronunciation assessment.
*/
- (void)pronunciationAssessFromMicrophone {
SPXSpeechConfiguration *speechConfig = [[SPXSpeechConfiguration alloc] initWithSubscription:speechKey region:serviceRegion];
if (!speechConfig) {
NSLog(@"Could not load speech config");
[self updateRecognitionErrorText:(@"Speech Config Error")];
return;
}
[self updateRecognitionStatusText:(@"Assessing...")];
SPXSpeechRecognizer* speechRecognizer = [[SPXSpeechRecognizer alloc] init:speechConfig];
if (!speechRecognizer) {
NSLog(@"Could not create speech recognizer");
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}
// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
SPXPronunciationAssessmentConfiguration *pronunicationConfig =
[[SPXPronunciationAssessmentConfiguration alloc] init:pronunciationAssessmentReferenceText
gradingSystem:SPXPronunciationAssessmentGradingSystem_HundredMark
granularity:SPXPronunciationAssessmentGranularity_Phoneme
enableMiscue:true];
[pronunicationConfig applyToRecognizer:speechRecognizer];
SPXSpeechRecognitionResult *speechResult = [speechRecognizer recognizeOnce];
if (SPXResultReason_Canceled == speechResult.reason) {
SPXCancellationDetails *details = [[SPXCancellationDetails alloc] initFromCanceledRecognitionResult:speechResult];
NSLog(@"Speech recognition was canceled: %@. Did you pass the correct key/region combination?", details.errorDetails);
[self updateRecognitionErrorText:([NSString stringWithFormat:@"Canceled: %@", details.errorDetails ])];
} else if (SPXResultReason_RecognizedSpeech == speechResult.reason) {
NSLog(@"Speech recognition result received: %@", speechResult.text);
SPXPronunciationAssessmentResult *pronunciationResult = [[SPXPronunciationAssessmentResult alloc]init:speechResult];
NSString *resultText = [NSString stringWithFormat:@"Assessment finished. \nAccuracy score: %f, Pronunciation score: %f, Completeness Score: %f, Fluency score: %f.", pronunciationResult.accuracyScore, pronunciationResult.pronunciationScore, pronunciationResult.completenessScore, pronunciationResult.fluencyScore];
[self updateRecognitionResultText:resultText];
} else {
NSLog(@"There was an error.");
[self updateRecognitionErrorText:(@"Speech Recognition Error")];
}
}
- (void)updateRecognitionResultText:(NSString *) resultText {
dispatch_async(dispatch_get_main_queue(), ^{
self.recognitionResultLabel.textColor = UIColor.blackColor;
self.recognitionResultLabel.text = resultText;
});
}
- (void)updateRecognitionErrorText:(NSString *) errorText {
dispatch_async(dispatch_get_main_queue(), ^{
self.recognitionResultLabel.textColor = UIColor.redColor;
self.recognitionResultLabel.text = errorText;
});
}
- (void)updateRecognitionStatusText:(NSString *) statusText {
dispatch_async(dispatch_get_main_queue(), ^{
self.recognitionResultLabel.textColor = UIColor.grayColor;
self.recognitionResultLabel.text = statusText;
});
}
@end