diff --git a/accessibility.h b/accessibility.h
index df965df2bf50..f52e6f4676e7 100644
--- a/accessibility.h
+++ b/accessibility.h
@@ -31,11 +31,36 @@
 #endif
 
 #include "configuration.h"
+#include "tasks/tasks_internal.h"
+
+#ifdef HAVE_THREADS
+#include "rthreads/rthreads.h"
+#endif
 
 typedef struct
 {
+   /* The last request task, used to prepare and send the translation */
+   retro_task_t *request_task;
+   
+   /* The last response task, used to parse costly translation data */
+   retro_task_t *response_task;
+   
+   /* Timestamp of the last translation request */
+   retro_time_t last_call;
+
+   #ifdef HAVE_THREADS
+   /* Necessary because last_image is manipulated by task handlers */
+   slock_t *image_lock;
+   #endif
+
+   /* Frame captured during the last call to the translation service */
+   uint8_t *last_image;
+   int last_image_size;
+
+   /* 1 if the automatic mode has been enabled, 0 otherwise */
    int ai_service_auto;
-   /* Is text-to-speech accessibility turned on? */
+   
+   /* Text-to-speech narrator override flag */
    bool enabled;
 } access_state_t;
 
@@ -46,42 +71,73 @@ bool is_narrator_running(bool accessibility_enable);
 #endif
 
 /*
-   This function does all the stuff needed to translate the game screen,
-   using the URL given in the settings.  Once the image from the frame
-   buffer is sent to the server, the callback will write the translated
-   image to the screen.
-
-   Supported client/services (thus far)
-   -VGTranslate client ( www.gitlab.com/spherebeaker/vg_translate )
-   -Ztranslate client/service ( www.ztranslate.net/docs/service )
-
-   To use a client, download the relevant code/release, configure
-   them, and run them on your local machine, or network.  Set the
-   retroarch configuration to point to your local client (usually
-   listening on localhost:4404 ) and enable translation service.
-
-   If you don't want to run a client, you can also use a service,
-   which is basically like someone running a client for you.  The
-   downside here is that your retroarch device will have to have
-   an internet connection, and you may have to sign up for it.
-
-   To make your own server, it must listen for a POST request, which
-   will consist of a JSON body, with the "image" field as a base64
-   encoded string of a 24bit-BMP/PNG that the will be translated.
-   The server must output the translated image in the form of a
-   JSON body, with the "image" field also as a base64 encoded
-   24bit-BMP, or as an alpha channel png.
-
-  "paused" boolean is passed in to indicate if the current call
-   was made during a paused frame.  Due to how the menu widgets work,
-   if the ai service is called in "auto" mode, then this call will
-   be made while the menu widgets unpause the core for a frame to update
-   the on-screen widgets.  To tell the ai service what the pause
-   mode is honestly, we store the runloop_paused variable from before
-   the handle_translation_cb wipes the widgets, and pass that in here.
+   Invoke this method to send a request to the AI service. 
+   It makes the following POST request using URL params:
+      – source_lang (optional): language code of the content currently running.
+      – target_lang (optional): language of the content to return.
+      – output: comma-separated list of formats that must be provided by the
+         service. Also lists supported sub-formats.
+         
+   The currently supported formats are:
+      – sound: raw audio to playback. (wav)
+      – text: text to be read through internal text-to-speech capabilities.
+         'subs' can be specified on top of that to explain that we are looking
+         for short text response in the manner of subtitles.
+      – image: image to display on top of the video feed. Widgets will be used
+         first if possible, otherwise we'll try to draw it directly on the 
+         video buffer. (bmp, png, png-a) [All in 24-bits BGR formats]
+         
+   In addition, the request contains a JSON payload, formatted as such:
+      – image: captured frame from the currently running content (in base64).
+      – format: format of the captured frame ("png", or "bmp").
+      – coords: array describing the coordinates of the image within the 
+         viewport space (x, y, width, height).
+      – viewport: array describing the size of the viewport (width, height).
+      – label: a text string describing the content (<system id>__<content id>).
+      – state: a JSON object describing the state of the frontend, containing:
+         – paused: 1 if the content has been paused, 0 otherwise.
+         – <key>: the name of a retropad input, valued 1 if pressed.
+            (a, b, x, y, l, r, l2, r2, l3, r3)
+            (up, down, left, right, start, select)
+            
+   The translation component then expects a response from the AI service in the
+   form of a JSON payload, formatted as such:
+      – image: base64 representation of an image in a supported format.
+      – sound: base64 representation of a sound byte in a supported format.
+      – text: results from the service as a string.
+      – text_position: hint for the position of the text when the service is
+         running in text mode (ie subtitles). Position is a number,
+         1 for Bottom or 2 for Top (defaults to bottom).
+      – press: a list of retropad input to forcibly press. On top of the 
+         expected keys (cf. 'state' above) values 'pause' and 'unpause' can be
+         specified to control the flow of the content.
+      – error: any error encountered with the request.
+      – auto: either 'auto' or 'continue' to control automatic requests.
+      
+   All fields are optional, but at least one of them must be present.
+   If 'error' is set, the error is shown to the user and everything else is
+   ignored, even 'auto' settings.
+   
+   With 'auto' on 'auto', RetroArch will automatically send a new request
+   (with a minimum delay enforced by uints.ai_service_poll_delay), with a value
+   of 'continue', RetroArch will ignore the returned content and skip to the 
+   next automatic request. This allows the service to specify that the returned
+   content is the same as the one previously sent, so RetroArch does not need to
+   update its display unless necessary. With 'continue' the service *must* 
+   still send the content, as we may need to display it if the user paused the 
+   AI service for instance.
+
+   {paused} boolean is passed in to indicate if the current call was made 
+   during a paused frame. Due to how the menu widgets work, if the AI service 
+   is called in 'auto' mode, then this call will be made while the menu widgets 
+   unpause the core for a frame to update the on-screen widgets. To tell the AI
+   service what the pause mode is honestly, we store the runloop_paused 
+   variable from before the service wipes the widgets, and pass that in here.
 */
 bool run_translation_service(settings_t *settings, bool paused);
 
+void translation_release(bool inform);
+
 bool accessibility_speak_priority(
       bool accessibility_enable,
       unsigned accessibility_narrator_speech_speed,
diff --git a/config.def.h b/config.def.h
index c6294a5fc05d..1ebbb626f1e2 100644
--- a/config.def.h
+++ b/config.def.h
@@ -1749,8 +1749,14 @@
 
 #define DEFAULT_AI_SERVICE_MODE 1
 
+#define DEFAULT_AI_SERVICE_TEXT_POSITION 0
+#define DEFAULT_AI_SERVICE_TEXT_PADDING 5
+
 #define DEFAULT_AI_SERVICE_URL "http://localhost:4404/"
 
+#define DEFAULT_AI_SERVICE_POLL_DELAY 0
+#define MAXIMUM_AI_SERVICE_POLL_DELAY 500
+
 #if defined(HAVE_FFMPEG) || defined(HAVE_MPV)
 #define DEFAULT_BUILTIN_MEDIAPLAYER_ENABLE true
 #else
diff --git a/configuration.c b/configuration.c
index 1bb55826bd62..6448f23087e2 100644
--- a/configuration.c
+++ b/configuration.c
@@ -2477,11 +2477,13 @@ static struct config_uint_setting *populate_settings_uint(
    SETTING_UINT("cheevos_appearance_anchor",     &settings->uints.cheevos_appearance_anchor, true, DEFAULT_CHEEVOS_APPEARANCE_ANCHOR, false);
    SETTING_UINT("cheevos_visibility_summary",    &settings->uints.cheevos_visibility_summary, true, DEFAULT_CHEEVOS_VISIBILITY_SUMMARY, false);
 #endif
-
    SETTING_UINT("accessibility_narrator_speech_speed", &settings->uints.accessibility_narrator_speech_speed, true, DEFAULT_ACCESSIBILITY_NARRATOR_SPEECH_SPEED, false);
-   SETTING_UINT("ai_service_mode",               &settings->uints.ai_service_mode,        true, DEFAULT_AI_SERVICE_MODE, false);
-   SETTING_UINT("ai_service_target_lang",        &settings->uints.ai_service_target_lang, true, 0, false);
-   SETTING_UINT("ai_service_source_lang",        &settings->uints.ai_service_source_lang, true, 0, false);
+   SETTING_UINT("ai_service_mode",              &settings->uints.ai_service_mode,            true, DEFAULT_AI_SERVICE_MODE, false);
+   SETTING_UINT("ai_service_target_lang",       &settings->uints.ai_service_target_lang,     true, 0, false);
+   SETTING_UINT("ai_service_source_lang",       &settings->uints.ai_service_source_lang,     true, 0, false);
+   SETTING_UINT("ai_service_poll_delay",        &settings->uints.ai_service_poll_delay,      true, DEFAULT_AI_SERVICE_POLL_DELAY, false);
+   SETTING_UINT("ai_service_text_position",     &settings->uints.ai_service_text_position,   true, DEFAULT_AI_SERVICE_TEXT_POSITION, false);
+   SETTING_UINT("ai_service_text_padding",      &settings->uints.ai_service_text_padding,    true, DEFAULT_AI_SERVICE_TEXT_PADDING, false);
 
 #ifdef HAVE_LIBNX
    SETTING_UINT("libnx_overclock",               &settings->uints.libnx_overclock, true, SWITCH_DEFAULT_CPU_PROFILE, false);
diff --git a/configuration.h b/configuration.h
index b951533465f1..a44ae3fbccf0 100644
--- a/configuration.h
+++ b/configuration.h
@@ -334,6 +334,9 @@ typedef struct settings
       unsigned ai_service_mode;
       unsigned ai_service_target_lang;
       unsigned ai_service_source_lang;
+      unsigned ai_service_poll_delay;
+      unsigned ai_service_text_position;
+      unsigned ai_service_text_padding;
 
       unsigned core_updater_auto_backup_history_size;
       unsigned video_black_frame_insertion;
diff --git a/frontend/drivers/platform_win32.c b/frontend/drivers/platform_win32.c
index 3c50c8205d92..ffccd7c8c281 100644
--- a/frontend/drivers/platform_win32.c
+++ b/frontend/drivers/platform_win32.c
@@ -1064,9 +1064,12 @@ static bool accessibility_speak_windows(int speed,
       if (!wc || res != 0) 
       {
          RARCH_ERR("Error communicating with NVDA\n");
+         /* Fallback on powershell immediately and retry */
+         g_plat_win32_flags &= ~PLAT_WIN32_FLAG_USE_NVDA;
+         g_plat_win32_flags |= PLAT_WIN32_FLAG_USE_POWERSHELL;
          if (wc)
             free(wc);
-         return false;
+         return accessibility_speak_windows(speed, speak_text, priority);
       }
 
       nvdaController_cancelSpeech_func();
diff --git a/gfx/gfx_widgets.c b/gfx/gfx_widgets.c
index 989decd85f5d..453dddc6636c 100644
--- a/gfx/gfx_widgets.c
+++ b/gfx/gfx_widgets.c
@@ -1471,6 +1471,67 @@ static void INLINE gfx_widgets_font_unbind(gfx_widget_font_data_t *font_data)
    font_driver_bind_block(font_data->font, NULL);
 }
 
+#ifdef HAVE_TRANSLATE
+static void gfx_widgets_ai_line(
+      video_frame_info_t *video, char *line, int line_idx, int line_total)
+{
+   settings_t *settings       = config_get_ptr();
+   gfx_display_t *p_disp      = (gfx_display_t*)video->disp_userdata;
+   dispgfx_widget_t *p_widget = (dispgfx_widget_t*)video->widgets_userdata;
+   void *userdata             = video->userdata;
+   unsigned video_width       = video->width;
+   unsigned video_height      = video->height;
+   
+   int line_width             = font_driver_get_message_width(
+         p_widget->gfx_widget_fonts.regular.font,
+         line, strlen(line), 1.0f);
+   
+   int hpadding               = p_widget->simple_widget_padding;
+   int vpadding               = settings->uints.ai_service_text_padding;
+   int half_vw                = video_width * 0.5f;
+   int block_width            = line_width + hpadding * 2;
+   int block_height           = p_widget->simple_widget_height;
+   int block_x                = half_vw - block_width * 0.5f;
+   int block_y                = 0;
+   int line_y                 = 0;
+   
+   int position               = (settings->uints.ai_service_text_position > 0)
+         ? settings->uints.ai_service_text_position
+         : p_widget->ai_service_text_position;
+   
+   switch (position)
+   {
+      case 0: /* Undef. */
+      case 1: /* Bottom */
+         block_y  = (video_height * (100 - vpadding) * 0.01f)
+                  - ((line_total - line_idx) * block_height);
+         break;
+      case 2: /* Top    */
+         block_y  = (video_height * (vpadding * 0.01f))
+                  + (line_idx * block_height);
+         break;
+   }
+   
+   line_y = block_y + block_height * 0.5f 
+          + p_widget->gfx_widget_fonts.regular.line_centre_offset;
+  
+   gfx_display_set_alpha(p_widget->backdrop_orig, DEFAULT_BACKDROP);
+   
+   gfx_display_draw_quad(
+         p_disp, userdata, video_width, video_height,
+         block_x, block_y, block_width, block_height,
+         video_width, video_height,
+         p_widget->backdrop_orig,
+         NULL);
+   
+   gfx_widgets_draw_text(
+         &p_widget->gfx_widget_fonts.regular,
+         line, half_vw, line_y,
+         video_width, video_height,
+         0xFFFFFFFF, TEXT_ALIGN_CENTER, true);
+}
+#endif
+
 void gfx_widgets_frame(void *data)
 {
    size_t i;
@@ -1520,12 +1581,8 @@ void gfx_widgets_frame(void *data)
    /* AI Service overlay */
    if (p_dispwidget->ai_service_overlay_state > 0)
    {
-      float outline_color[16] = {
-      0.00, 1.00, 0.00, 1.00,
-      0.00, 1.00, 0.00, 1.00,
-      0.00, 1.00, 0.00, 1.00,
-      0.00, 1.00, 0.00, 1.00,
-      };
+      int text_length = strlen(p_dispwidget->ai_service_text);
+      
       gfx_display_set_alpha(p_dispwidget->pure_white, 1.0f);
 
       if (p_dispwidget->ai_service_overlay_texture)
@@ -1550,63 +1607,46 @@ void gfx_widgets_frame(void *data)
          if (dispctx->blend_end)
             dispctx->blend_end(userdata);
       }
-
-      /* top line */
-      gfx_display_draw_quad(
-            p_disp,
-            userdata,
-            video_width, video_height,
-            0, 0,
-            video_width,
-            p_dispwidget->divider_width_1px,
-            video_width,
-            video_height,
-            outline_color,
-            NULL
-            );
-      /* bottom line */
-      gfx_display_draw_quad(
-            p_disp,
-            userdata,
-            video_width, video_height,
-            0,
-            video_height - p_dispwidget->divider_width_1px,
-            video_width,
-            p_dispwidget->divider_width_1px,
-            video_width,
-            video_height,
-            outline_color,
-            NULL
-            );
-      /* left line */
-      gfx_display_draw_quad(
-            p_disp,
-            userdata,
-            video_width,
-            video_height,
-            0,
-            0,
-            p_dispwidget->divider_width_1px,
-            video_height,
-            video_width,
-            video_height,
-            outline_color,
-            NULL
-            );
-      /* right line */
-      gfx_display_draw_quad(
-            p_disp,
-            userdata,
-            video_width, video_height,
-            video_width - p_dispwidget->divider_width_1px,
-            0,
-            p_dispwidget->divider_width_1px,
-            video_height,
-            video_width,
-            video_height,
-            outline_color,
-            NULL
-            );
+      
+      /* AI Service subtitle overlay widget */
+      if (text_length > 0)
+      {
+         int padding      = p_dispwidget->simple_widget_padding;
+         int text_width   = font_driver_get_message_width(
+               p_dispwidget->gfx_widget_fonts.regular.font,
+               p_dispwidget->ai_service_text,
+               text_length, 1.0f);
+         
+         if (text_width > (video_width * 0.9f - padding * 2))
+         {
+            int text_half     = text_length / 2;
+            char *extra_line  = (char*)malloc(sizeof(char) * text_length);
+            for (; text_half > 0; text_half--)
+            {
+               if (p_dispwidget->ai_service_text[text_half] == ' ')
+               {
+                  p_dispwidget->ai_service_text[text_half] = '\0';
+                  gfx_widgets_ai_line(
+                        video_info, p_dispwidget->ai_service_text, 0, 2);
+                  strlcpy(
+                        extra_line, 
+                        p_dispwidget->ai_service_text + text_half + 1,
+                        text_length - text_half);
+                  gfx_widgets_ai_line(
+                        video_info, extra_line, 1, 2);
+                        
+                  p_dispwidget->ai_service_text[text_half] = ' ';
+                  free(extra_line);
+                  break;
+               }
+            }
+         } 
+         else 
+         {
+            gfx_widgets_ai_line(
+                  video_info, p_dispwidget->ai_service_text, 0, 1);
+         }
+      }
 
       if (p_dispwidget->ai_service_overlay_state == 2)
           p_dispwidget->ai_service_overlay_state = 3;
@@ -2149,6 +2189,7 @@ void gfx_widgets_ai_service_overlay_unload(void)
    if (p_dispwidget->ai_service_overlay_state == 1)
    {
       video_driver_texture_unload(&p_dispwidget->ai_service_overlay_texture);
+      p_dispwidget->ai_service_text[0]         = '\0';
       p_dispwidget->ai_service_overlay_texture = 0;
       p_dispwidget->ai_service_overlay_state   = 0;
    }
diff --git a/gfx/gfx_widgets.h b/gfx/gfx_widgets.h
index 3b8195dc70e7..65bb9e990953 100644
--- a/gfx/gfx_widgets.h
+++ b/gfx/gfx_widgets.h
@@ -236,6 +236,8 @@ typedef struct dispgfx_widget
 #ifdef HAVE_TRANSLATE
    unsigned ai_service_overlay_width;
    unsigned ai_service_overlay_height;
+   unsigned ai_service_text_position;
+   char ai_service_text[255];
 #endif
 
    uint8_t flags;
diff --git a/intl/msg_hash_lbl.h b/intl/msg_hash_lbl.h
index fbdef2afffc1..8f1dd793e125 100644
--- a/intl/msg_hash_lbl.h
+++ b/intl/msg_hash_lbl.h
@@ -6031,6 +6031,18 @@ MSG_HASH(
    MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG,
    "ai_service_source_lang"
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY,
+   "ai_service_poll_delay"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION,
+   "ai_service_text_position"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING,
+   "ai_service_text_padding"
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_SETTINGS_SHOW_DRIVERS,
    "settings_show_drivers"
diff --git a/intl/msg_hash_us.h b/intl/msg_hash_us.h
index 9da8361c63ca..833568778dce 100644
--- a/intl/msg_hash_us.h
+++ b/intl/msg_hash_us.h
@@ -6565,9 +6565,9 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_MODE,
    "AI Service Output"
    )
-MSG_HASH( /* FIXME What does the Narrator mode do? */
+MSG_HASH(
    MENU_ENUM_SUBLABEL_AI_SERVICE_MODE,
-   "Show translation as a text overlay (Image Mode), or play as Text-To-Speech (Speech Mode)."
+   "Show translation as an image overlay (Image Mode), as direct audio (Speech), text-to-speech (Narrator), or text overlay (Text)."
    )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_URL,
@@ -6609,6 +6609,30 @@ MSG_HASH(
    MENU_ENUM_SUBLABEL_AI_SERVICE_TARGET_LANG,
    "The language the service will translate to. 'Default' is English."
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_POLL_DELAY,
+   "AI Service Auto-Polling Delay"
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_AI_SERVICE_POLL_DELAY,
+   "Minimum delay in ms between automatic calls. Lowers reactivity but increases CPU performance."
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION,
+   "AI Service Text Position Override"
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_POSITION,
+   "Override for the position of the overlay, when the service is in Text mode."
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_PADDING,
+   "AI Service Text Padding (%)"
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_PADDING,
+   "Vertical padding to apply to the text overlay, when the service is in Text mode. More padding will push the text towards the center of the screen."
+   )
 
 /* Settings > Accessibility */
 
@@ -10176,6 +10200,26 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE,
    "Narrator Mode"
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE,
+   "Text Mode"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE,
+   "Text + Narrator"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE,
+   "Image + Narrator"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM,
+   "Bottom"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP,
+   "Top"
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_PLAYLIST_ENTRY_REMOVE_ENABLE_HIST_FAV,
    "History & Favorites"
@@ -13239,6 +13283,22 @@ MSG_HASH( /* FIXME Should be MSG_ */
    MENU_ENUM_LABEL_VALUE_SIDELOAD_CORE_ERROR,
    "Core installation failed"
    )
+MSG_HASH(
+   MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED,
+   "Video driver not supported for AI Service."
+   )
+MSG_HASH(
+   MSG_AI_AUTO_MODE_ENABLED,
+   "Automatic translation enabled."
+   )
+MSG_HASH(
+   MSG_AI_AUTO_MODE_DISABLED,
+   "Automatic translation disabled."
+   )
+MSG_HASH(
+   MSG_AI_NOTHING_TO_TRANSLATE,
+   "Nothing to translate."
+   )
 MSG_HASH(
    MSG_CHEAT_DELETE_ALL_INSTRUCTIONS,
    "Press right five times to delete all cheats."
diff --git a/menu/cbs/menu_cbs_sublabel.c b/menu/cbs/menu_cbs_sublabel.c
index 40d8eba0722d..380c11c41fbc 100644
--- a/menu/cbs/menu_cbs_sublabel.c
+++ b/menu/cbs/menu_cbs_sublabel.c
@@ -269,6 +269,9 @@ DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_target_lang,  MENU_ENUM_S
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_source_lang,  MENU_ENUM_SUBLABEL_AI_SERVICE_SOURCE_LANG)
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_url,  MENU_ENUM_SUBLABEL_AI_SERVICE_URL)
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_enable,  MENU_ENUM_SUBLABEL_AI_SERVICE_ENABLE)
+DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_poll_delay,  MENU_ENUM_SUBLABEL_AI_SERVICE_POLL_DELAY)
+DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_text_position,  MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_POSITION)
+DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_text_padding,  MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_PADDING)
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_power_management_settings_list,  MENU_ENUM_SUBLABEL_POWER_MANAGEMENT_SETTINGS)
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_privacy_settings_list,         MENU_ENUM_SUBLABEL_PRIVACY_SETTINGS)
 DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_midi_settings_list,            MENU_ENUM_SUBLABEL_MIDI_SETTINGS)
@@ -5001,6 +5004,15 @@ int menu_cbs_init_bind_sublabel(menu_file_list_cbs_t *cbs,
          case MENU_ENUM_LABEL_AI_SERVICE_ENABLE:
             BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_enable);
             break;
+         case MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY:
+            BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_poll_delay);
+            break;
+         case MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION:
+            BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_text_position);
+            break;
+         case MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING:
+            BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_text_padding);
+            break;
          case MENU_ENUM_LABEL_AI_SERVICE_SETTINGS:
             BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_settings_list);
             break;
diff --git a/menu/menu_displaylist.c b/menu/menu_displaylist.c
index 094616d06e6f..5667760e5100 100644
--- a/menu/menu_displaylist.c
+++ b/menu/menu_displaylist.c
@@ -5938,12 +5938,14 @@ void menu_displaylist_info_init(menu_displaylist_info_t *info)
    info->setting                  = NULL;
 }
 
-typedef struct menu_displaylist_build_info {
+typedef struct menu_displaylist_build_info 
+{
    enum msg_hash_enums enum_idx;
    enum menu_displaylist_parse_type parse_type;
 } menu_displaylist_build_info_t;
 
-typedef struct menu_displaylist_build_info_selective {
+typedef struct menu_displaylist_build_info_selective 
+{
    enum msg_hash_enums enum_idx;
    enum menu_displaylist_parse_type parse_type;
    bool checked;
@@ -6683,7 +6685,8 @@ unsigned menu_displaylist_build_list(
             bool playlist_show_sublabels = settings->bools.playlist_show_sublabels;
             bool history_list_enable     = settings->bools.history_list_enable;
             bool truncate_playlist       = settings->bools.ozone_truncate_playlist_name;
-            menu_displaylist_build_info_selective_t build_list[] = {
+            menu_displaylist_build_info_selective_t build_list[] = 
+            {
                {MENU_ENUM_LABEL_HISTORY_LIST_ENABLE,                 PARSE_ONLY_BOOL, true},
                {MENU_ENUM_LABEL_CONTENT_HISTORY_SIZE,                PARSE_ONLY_UINT, false},
                {MENU_ENUM_LABEL_CONTENT_FAVORITES_SIZE,              PARSE_ONLY_INT,  true},
@@ -7736,12 +7739,15 @@ unsigned menu_displaylist_build_list(
             bool ai_service_enable         = settings->bools.ai_service_enable;
 
             menu_displaylist_build_info_selective_t build_list[] = {
-               {MENU_ENUM_LABEL_AI_SERVICE_ENABLE,      PARSE_ONLY_BOOL,   true},
-               {MENU_ENUM_LABEL_AI_SERVICE_MODE,        PARSE_ONLY_UINT,   false},
-               {MENU_ENUM_LABEL_AI_SERVICE_URL,         PARSE_ONLY_STRING, false},
-               {MENU_ENUM_LABEL_AI_SERVICE_PAUSE,       PARSE_ONLY_BOOL,   false},
-               {MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG, PARSE_ONLY_UINT,   false},
-               {MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG, PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_ENABLE,        PARSE_ONLY_BOOL,   true},
+               {MENU_ENUM_LABEL_AI_SERVICE_MODE,          PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_URL,           PARSE_ONLY_STRING, false},
+               {MENU_ENUM_LABEL_AI_SERVICE_PAUSE,         PARSE_ONLY_BOOL,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY,    PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG,   PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG,   PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION, PARSE_ONLY_UINT,   false},
+               {MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING,  PARSE_ONLY_UINT,   false},
             };
 
             for (i = 0; i < ARRAY_SIZE(build_list); i++)
@@ -7751,8 +7757,11 @@ unsigned menu_displaylist_build_list(
                   case MENU_ENUM_LABEL_AI_SERVICE_MODE:
                   case MENU_ENUM_LABEL_AI_SERVICE_URL:
                   case MENU_ENUM_LABEL_AI_SERVICE_PAUSE:
+                  case MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY:
                   case MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG:
                   case MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG:
+                  case MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION:
+                  case MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING:
                      if (ai_service_enable)
                         build_list[i].checked = true;
                      break;
diff --git a/menu/menu_setting.c b/menu/menu_setting.c
index 352d5a276244..e23a3d25863e 100644
--- a/menu/menu_setting.c
+++ b/menu/menu_setting.c
@@ -3014,6 +3014,42 @@ static void setting_get_string_representation_uint_ai_service_mode(
       case 2:
          enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE;
          break;
+      case 3:
+         enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE;
+         break;
+      case 4:
+         enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE;
+         break;
+      case 5:
+         enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE;
+         break;
+      default:
+         break;
+   }
+
+   if (enum_idx != 0)
+      strlcpy(s, msg_hash_to_str(enum_idx), len);
+}
+
+static void setting_get_string_representation_uint_ai_service_text_position(
+      rarch_setting_t *setting,
+      char *s, size_t len)
+{
+   enum msg_hash_enums enum_idx = MSG_UNKNOWN;
+   if (!setting)
+      return;
+
+   switch (*setting->value.target.unsigned_integer)
+   {
+      case 0:
+         enum_idx = MENU_ENUM_LABEL_VALUE_NONE;
+         break;
+      case 1:
+         enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM;
+         break;
+      case 2:
+         enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP;
+         break;
       default:
          break;
    }
@@ -19234,7 +19270,7 @@ static bool setting_append_list(
          (*list)[list_info->index - 1].get_string_representation =
             &setting_get_string_representation_uint_ai_service_mode;
          (*list)[list_info->index - 1].action_ok     = &setting_action_ok_uint;
-         menu_settings_list_current_add_range(list, list_info, 0, 2, 1, true, true);
+         menu_settings_list_current_add_range(list, list_info, 0, 5, 1, true, true);
 
          CONFIG_STRING(
                list, list_info,
@@ -19316,6 +19352,50 @@ static bool setting_append_list(
             &setting_get_string_representation_uint_ai_service_lang;
          (*list)[list_info->index - 1].action_ok     = &setting_action_ok_uint;
          menu_settings_list_current_add_range(list, list_info, TRANSLATION_LANG_DONT_CARE, (TRANSLATION_LANG_LAST-1), 1, true, true);
+         
+         CONFIG_UINT(
+               list, list_info,
+               &settings->uints.ai_service_poll_delay,
+               MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY,
+               MENU_ENUM_LABEL_VALUE_AI_SERVICE_POLL_DELAY,
+               DEFAULT_AI_SERVICE_POLL_DELAY,
+               &group_info,
+               &subgroup_info,
+               parent_group,
+               general_write_handler,
+               general_read_handler);
+         (*list)[list_info->index - 1].action_ok     = &setting_action_ok_uint;
+         menu_settings_list_current_add_range(list, list_info, 0, MAXIMUM_AI_SERVICE_POLL_DELAY, 50, true, true);
+         
+         CONFIG_UINT(
+               list, list_info,
+               &settings->uints.ai_service_text_position,
+               MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION,
+               MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION,
+               DEFAULT_AI_SERVICE_TEXT_POSITION,
+               &group_info,
+               &subgroup_info,
+               parent_group,
+               general_write_handler,
+               general_read_handler);
+         (*list)[list_info->index - 1].get_string_representation =
+            &setting_get_string_representation_uint_ai_service_text_position;
+         (*list)[list_info->index - 1].action_ok     = &setting_action_ok_uint;
+         menu_settings_list_current_add_range(list, list_info, 0, 2, 1, true, true);
+         
+         CONFIG_UINT(
+               list, list_info,
+               &settings->uints.ai_service_text_padding,
+               MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING,
+               MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_PADDING,
+               DEFAULT_AI_SERVICE_TEXT_PADDING,
+               &group_info,
+               &subgroup_info,
+               parent_group,
+               general_write_handler,
+               general_read_handler);
+         (*list)[list_info->index - 1].action_ok     = &setting_action_ok_uint;
+         menu_settings_list_current_add_range(list, list_info, 0, 20, 1, true, true);
 
          END_SUB_GROUP(list, list_info, parent_group);
          END_GROUP(list, list_info, parent_group);
diff --git a/msg_hash.h b/msg_hash.h
index 99fddc252f00..8fc17474a95f 100644
--- a/msg_hash.h
+++ b/msg_hash.h
@@ -569,6 +569,10 @@ enum msg_hash_enums
    MSG_FAILED_TO_ENTER_GAMEMODE_LINUX,
    MSG_VRR_RUNLOOP_ENABLED,
    MSG_VRR_RUNLOOP_DISABLED,
+   MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED,
+   MSG_AI_AUTO_MODE_ENABLED,
+   MSG_AI_AUTO_MODE_DISABLED,
+   MSG_AI_NOTHING_TO_TRANSLATE,
    MSG_VIDEO_REFRESH_RATE_CHANGED,
 
    MSG_IOS_TOUCH_MOUSE_ENABLED,
@@ -2785,6 +2789,9 @@ enum msg_hash_enums
    MENU_LABEL(AI_SERVICE_URL),
    MENU_LABEL(AI_SERVICE_ENABLE),
    MENU_LABEL(AI_SERVICE_PAUSE),
+   MENU_LABEL(AI_SERVICE_POLL_DELAY),
+   MENU_LABEL(AI_SERVICE_TEXT_POSITION),
+   MENU_LABEL(AI_SERVICE_TEXT_PADDING),
 
    MENU_LABEL(ON),
    MENU_LABEL(OFF),
@@ -3477,6 +3484,11 @@ enum msg_hash_enums
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_MODE,
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_SPEECH_MODE,
    MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE,
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE,
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE,
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE,
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP,
+   MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM,
 
    MENU_ENUM_LABEL_VALUE_NONE,
    MENU_ENUM_LABEL_VALUE_NO_INFORMATION_AVAILABLE,
diff --git a/retroarch.c b/retroarch.c
index 39593dbfd88b..f4193e1fa731 100644
--- a/retroarch.c
+++ b/retroarch.c
@@ -2236,6 +2236,9 @@ bool command_event(enum event_command cmd, void *data)
 #if defined(HAVE_ACCESSIBILITY) || defined(HAVE_TRANSLATE)
    access_state_t *access_st       = access_state_get_ptr();
 #endif
+#if defined(HAVE_TRANSLATE) && defined(HAVE_GFX_WIDGETS)
+   dispgfx_widget_t *p_dispwidget  = dispwidget_get_ptr();
+#endif
 #ifdef HAVE_MENU
    struct menu_state *menu_st      = menu_state_get_ptr();
 #endif
@@ -2252,12 +2255,12 @@ bool command_event(enum event_command cmd, void *data)
 #ifdef HAVE_OVERLAY
          input_overlay_unload();
 #endif
-#if defined(HAVE_TRANSLATE) && defined(HAVE_GFX_WIDGETS)
-         /* Because the overlay is a display widget,
-          * it's going to be written
-          * over the menu, so we unset it here. */
-         if (dispwidget_get_ptr()->ai_service_overlay_state != 0)
+#ifdef HAVE_TRANSLATE
+         translation_release(true);
+#ifdef HAVE_GFX_WIDGETS
+         if (p_dispwidget->ai_service_overlay_state != 0)
             gfx_widgets_ai_service_overlay_unload();
+#endif
 #endif
          break;
       case CMD_EVENT_OVERLAY_INIT:
@@ -2331,6 +2334,11 @@ bool command_event(enum event_command cmd, void *data)
                            accessibility_narrator_speech_speed,
                            (char*)msg_hash_to_str(MSG_UNPAUSED), 10);
 #endif
+#ifdef HAVE_GFX_WIDGETS
+                  if (p_dispwidget->ai_service_overlay_state != 0)
+                     gfx_widgets_ai_service_overlay_unload();
+#endif
+                  translation_release(true);
                   command_event(CMD_EVENT_UNPAUSE, NULL);
                }
                else /* Pause on call */
@@ -2349,17 +2357,25 @@ bool command_event(enum event_command cmd, void *data)
                 * Also, this mode is required for "auto" translation
                 * packages, since you don't want to pause for that.
                 */
-               if (access_st->ai_service_auto == 2)
+               if (access_st->ai_service_auto != 0)
                {
                   /* Auto mode was turned on, but we pressed the
                    * toggle button, so turn it off now. */
-                  access_st->ai_service_auto = 0;
-#ifdef HAVE_MENU_WIDGETS
-                  gfx_widgets_ai_service_overlay_unload();
+                  translation_release(true);
+#ifdef HAVE_GFX_WIDGETS
+                  if (p_dispwidget->ai_service_overlay_state != 0)
+                     gfx_widgets_ai_service_overlay_unload();
 #endif
                }
-               else
-                  command_event(CMD_EVENT_AI_SERVICE_CALL, NULL);
+               else 
+               {
+#ifdef HAVE_GFX_WIDGETS
+                  if (p_dispwidget->ai_service_overlay_state != 0)
+                     gfx_widgets_ai_service_overlay_unload();
+                  else
+#endif
+                     command_event(CMD_EVENT_AI_SERVICE_CALL, NULL);
+               }
             }
 #endif
             break;
@@ -4473,10 +4489,6 @@ bool command_event(enum event_command cmd, void *data)
                if (data)
                   paused = *((bool*)data);
 
-               if (     (access_st->ai_service_auto == 0)
-                     && !settings->bools.ai_service_pause)
-                  access_st->ai_service_auto = 1;
-
                run_translation_service(settings, paused);
             }
 #endif
@@ -7165,6 +7177,9 @@ bool retroarch_main_quit(void)
    video_driver_state_t*video_st = video_state_get_ptr();
    settings_t *settings          = config_get_ptr();
    bool config_save_on_exit      = settings->bools.config_save_on_exit;
+#ifdef HAVE_ACCESSIBILITY
+   access_state_t *access_st     = access_state_get_ptr();
+#endif
    struct retro_system_av_info *av_info = &video_st->av_info;
 
    /* Restore video driver before saving */
@@ -7263,6 +7278,17 @@ bool retroarch_main_quit(void)
    retroarch_menu_running_finished(true);
 #endif
 
+#ifdef HAVE_ACCESSIBILITY
+   translation_release(false);
+#ifdef HAVE_THREADS
+   if (access_st->image_lock)
+   {
+      slock_free(access_st->image_lock);
+      access_st->image_lock = NULL;
+   }
+#endif
+#endif
+
    return true;
 }
 
diff --git a/tasks/task_translation.c b/tasks/task_translation.c
index 7082a273caca..f989ce595d56 100644
--- a/tasks/task_translation.c
+++ b/tasks/task_translation.c
@@ -29,9 +29,11 @@
 #include <formats/rbmp.h>
 #include <formats/rpng.h>
 #include <formats/rjson.h>
+#include <formats/rjson_helpers.h>
 #include <gfx/scaler/pixconv.h>
 #include <gfx/scaler/scaler.h>
 #include <gfx/video_frame.h>
+#include <retro_timers.h>
 #include "../translation_defines.h"
 
 #ifdef HAVE_GFX_WIDGETS
@@ -47,588 +49,290 @@
 #include "../paths.h"
 #include "../runloop.h"
 #include "../verbosity.h"
+#include "../msg_hash.h"
 
 #include "tasks_internal.h"
 
-static void task_auto_translate_handler(retro_task_t *task)
+static const char* ACCESS_INPUT_LABELS[] = 
+{ 
+   "b", "y", "select", "start", "up", "down", "left", "right", 
+   "a", "x", "l", "r", "l2", "r2", "l3", "r3" 
+};
+
+static const char* ACCESS_RESPONSE_KEYS[] = 
+{ 
+   "image", "sound", "text", "error", "auto", "press", "text_position"
+};
+
+typedef struct 
 {
-   int               *mode_ptr = (int*)task->user_data;
-   uint32_t runloop_flags      = runloop_get_flags();
-   access_state_t *access_st   = access_state_get_ptr();
-#ifdef HAVE_ACCESSIBILITY
-   settings_t *settings        = config_get_ptr();
-#endif
+   uint8_t *data;
+   unsigned size;
+   unsigned width;
+   unsigned height;
+   
+   unsigned content_x;
+   unsigned content_y;
+   unsigned content_width;
+   unsigned content_height;
+   unsigned viewport_width;
+   unsigned viewport_height;
+} access_frame_t;
+
+typedef struct 
+{
+   char *data;
+   int length;
+   char format[4];
+} access_base64_t;
 
-   if (task_get_cancelled(task))
-      goto task_finished;
+typedef struct 
+{
+   char *inputs;
+   bool paused;
+} access_request_t;
 
-   switch (*mode_ptr)
-   {
-      case 1: /* Speech   Mode */
+typedef struct 
+{
+   char *image;
+   int image_size;
 #ifdef HAVE_AUDIOMIXER
-         if (!audio_driver_is_ai_service_speech_running())
-            goto task_finished;
+   void *sound;
+   int sound_size;
 #endif
-         break;
-      case 2: /* Narrator Mode */
+   char *error;
+   char *text;
+   char *recall;
+   char *input;
+   int text_position;
+} access_response_t;
+
+/* UTILITIES ---------------------------------------------------------------- */
+/* -------------------------------------------------------------------------- */
+
+/**
+ * Returns true if the accessibility narrator is currently playing audio.
+ */
 #ifdef HAVE_ACCESSIBILITY
-         if (!is_narrator_running(
-                  settings->bools.accessibility_enable))
-            goto task_finished;
-#endif
-         break;
-      default:
-         break;
-   }
-
-   return;
-
-task_finished:
-   if (access_st->ai_service_auto == 1)
-      access_st->ai_service_auto = 2;
-
-   task_set_finished(task, true);
-
-   if (*mode_ptr == 1 || *mode_ptr == 2)
+bool is_narrator_running(bool accessibility_enable)
+{
+   access_state_t *access_st = access_state_get_ptr();
+   if (is_accessibility_enabled(
+            accessibility_enable,
+            access_st->enabled))
    {
-      bool was_paused = (runloop_flags & RUNLOOP_FLAG_PAUSED) ? true : false;
-      command_event(CMD_EVENT_AI_SERVICE_CALL, &was_paused);
+      frontend_ctx_driver_t *frontend = 
+         frontend_state_get_ptr()->current_frontend_ctx;
+      if (frontend && frontend->is_narrator_running)
+         return frontend->is_narrator_running();
    }
-   if (task->user_data)
-       free(task->user_data);
+   return false;
 }
+#endif
 
-static void call_auto_translate_task(
-      settings_t *settings,
-      bool *was_paused)
+/**
+ * Returns true if array {a} and {b}, both of the same size {size} are equal.
+ * This method prevents a potential bug with memcmp on some platforms.
+ */
+static bool u8_array_equal(uint8_t *a, uint8_t *b, int size)
 {
-   int ai_service_mode       = settings->uints.ai_service_mode;
-   access_state_t *access_st = access_state_get_ptr();
-
-   /*Image Mode*/
-   if (ai_service_mode == 0)
-   {
-      if (access_st->ai_service_auto == 1)
-         access_st->ai_service_auto = 2;
-
-      command_event(CMD_EVENT_AI_SERVICE_CALL, was_paused);
-   }
-   else /* Speech or Narrator Mode */
+   int i = 0;
+   for (; i < size; i++)
    {
-      int* mode                          = NULL;
-      retro_task_t  *t                   = task_init();
-      if (!t)
-         return;
-
-      mode                               = (int*)malloc(sizeof(int));
-      *mode                              = ai_service_mode;
-
-      t->handler                         = task_auto_translate_handler;
-      t->user_data                       = mode;
-      t->mute                            = true;
-      task_queue_push(t);
+      if (a[i] != b[i])
+         return false;
    }
+   return true;
 }
 
-static void handle_translation_cb(
-      retro_task_t *task, void *task_data,
-      void *user_data, const char *error)
+/**
+ * Helper method to simplify accessibility speech usage. This method will only
+ * use TTS to read the provided text if accessibility has been enabled in the
+ * frontend or by RetroArch's internal override mechanism.
+ */
+static void accessibility_speak(const char *text)
 {
-   uint8_t* raw_output_data          = NULL;
-   char* raw_image_file_data         = NULL;
-   struct scaler_ctx* scaler         = NULL;
-   http_transfer_data_t *data        = (http_transfer_data_t*)task_data;
-   int new_image_size                = 0;
-#ifdef HAVE_AUDIOMIXER
-   int new_sound_size                = 0;
-#endif
-   void* raw_image_data              = NULL;
-   void* raw_image_data_alpha        = NULL;
-   void* raw_sound_data              = NULL;
-   rjson_t *json                     = NULL;
-   int json_current_key              = 0;
-   char* err_str                     = NULL;
-   char* txt_str                     = NULL;
-   char* auto_str                    = NULL;
-   char* key_str                     = NULL;
-   settings_t* settings              = config_get_ptr();
-   uint32_t runloop_flags            = runloop_get_flags();
-#ifdef HAVE_ACCESSIBILITY
-   input_driver_state_t *input_st    = input_state_get_ptr();
-#endif
-   video_driver_state_t
-      *video_st                      = video_state_get_ptr();
-   const enum retro_pixel_format
-      video_driver_pix_fmt           = video_st->pix_fmt;
-   access_state_t *access_st         = access_state_get_ptr();
-#ifdef HAVE_GFX_WIDGETS
-   bool gfx_widgets_paused           = (video_st->flags &
-      VIDEO_FLAG_WIDGETS_PAUSED) ? true : false;
-   dispgfx_widget_t *p_dispwidget    = dispwidget_get_ptr();
-#endif
 #ifdef HAVE_ACCESSIBILITY
-   bool accessibility_enable         = settings->bools.accessibility_enable;
-   unsigned accessibility_narrator_speech_speed = settings->uints.accessibility_narrator_speech_speed;
-#ifdef HAVE_GFX_WIDGETS
-   /* When auto mode is on, we turn off the overlay
-    * once we have the result for the next call.*/
-   if (p_dispwidget->ai_service_overlay_state != 0
-       && access_st->ai_service_auto == 2)
-      gfx_widgets_ai_service_overlay_unload();
-#endif
+   settings_t *settings = config_get_ptr();
+   unsigned speed       = settings->uints.accessibility_narrator_speech_speed;
+   bool narrator_on     = settings->bools.accessibility_enable;
+   
+   accessibility_speak_priority(narrator_on, speed, text, 10);
 #endif
+}
 
-#ifdef DEBUG
-   if (access_st->ai_service_auto != 2)
-      RARCH_LOG("RESULT FROM AI SERVICE...\n");
+/**
+ * Speaks the provided text using TTS. This only happens if the narrator has 
+ * been enabled or the service is running in Narrator mode, in which case it
+ * must been used even if the user has disabled it.
+ */
+static void translation_speak(const char *text)
+{
+#ifdef HAVE_ACCESSIBILITY
+   settings_t *settings       = config_get_ptr();
+   access_state_t *access_st  = access_state_get_ptr();
+   
+   unsigned mode     = settings->uints.ai_service_mode;
+   unsigned speed    = settings->uints.accessibility_narrator_speech_speed;
+   bool narrator_on  = settings->bools.accessibility_enable;
+
+   /* Force the use of the narrator in Narrator modes (TTS) */
+   if (mode == 2 || mode == 4 || mode == 5 || narrator_on || access_st->enabled)
+     accessibility_speak_priority(true, speed, text, 10);
 #endif
+}
 
-   if (!data || error || !data->data)
-      goto finish;
-
-   if (!(json = rjson_open_buffer(data->data, data->len)))
-      goto finish;
-
-   /* Parse JSON body for the image and sound data */
-   for (;;)
+/**
+ * Displays the given message on screen and returns true. Returns false if no
+ * {message} is provided (i.e. it is NULL). The message will be displayed as
+ * information or error depending on the {error} boolean. In addition, it will
+ * be logged if {error} is true, or if this is a debug build. The message will
+ * also be played by the accessibility narrator if the user enabled it.
+ */
+static bool translation_user_message(const char *message, bool error)
+{
+   if (message)
    {
-      static const char* keys[] = { "image", "sound", "text", "error", "auto", "press" };
-
-      const char *str           = NULL;
-      size_t str_len            = 0;
-      enum rjson_type json_type = rjson_next(json);
-
-      if (json_type == RJSON_DONE || json_type == RJSON_ERROR)
-         break;
-      if (json_type != RJSON_STRING)
-         continue;
-      if (rjson_get_context_type(json) != RJSON_OBJECT)
-         continue;
-      str                       = rjson_get_string(json, &str_len);
-
-      if ((rjson_get_context_count(json) & 1) == 1)
-      {
-         int i;
-         json_current_key = -1;
-
-         for (i = 0; i < (int)ARRAY_SIZE(keys); i++)
-         {
-            if (string_is_equal(str, keys[i]))
-            {
-               json_current_key = i;
-               break;
-            }
-         }
-      }
+      accessibility_speak(message);
+      runloop_msg_queue_push(
+            message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, 
+            error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO);
+      if (error)
+         RARCH_ERR("[Translate] %s\n", message);
+#ifdef DEBUG
       else
-      {
-         switch (json_current_key)
-         {
-            case 0: /* image */
-               raw_image_file_data = (char*)unbase64(str,
-                    (int)str_len, &new_image_size);
-               break;
-#ifdef HAVE_AUDIOMIXER
-            case 1: /* sound */
-               raw_sound_data = (void*)unbase64(str,
-                    (int)str_len, &new_sound_size);
-               break;
+         RARCH_LOG("[Translate] %s\n", message);
 #endif
-            case 2: /* text */
-               txt_str = strdup(str);
-               break;
-            case 3: /* error */
-               err_str = strdup(str);
-               break;
-            case 4: /* auto */
-               auto_str = strdup(str);
-               break;
-            case 5: /* press */
-               key_str = strdup(str);
-               break;
-         }
-         json_current_key = -1;
-      }
+      return true;
    }
+   return false;
+}
 
-   if (string_is_equal(err_str, "No text found."))
+/**
+ * Displays the given hash on screen and returns true. Returns false if no
+ * {hash} is provided (i.e. it is NULL). The message will be displayed as
+ * information or error depending on the {error} boolean. In addition, it will
+ * be logged if {error} is true, or if this is a debug build. The message will
+ * also be played by the accessibility narrator if the user enabled it.
+ */
+static bool translation_hash_message(enum msg_hash_enums hash, bool error)
+{
+   if (hash)
    {
+      const char *message  = msg_hash_to_str(hash);
+      const char *intl     = msg_hash_to_str_us(hash);
+      
+      accessibility_speak(message);
+      runloop_msg_queue_push(
+            message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, 
+            error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO);
+      if (error)
+         RARCH_ERR("[Translate] %s\n", intl);
 #ifdef DEBUG
-      RARCH_LOG("No text found...\n");
-#endif
-      if (txt_str)
-      {
-         free(txt_str);
-         txt_str = NULL;
-      }
-
-      txt_str = (char*)malloc(15);
-      strlcpy(txt_str, err_str, 15);
-#ifdef HAVE_GFX_WIDGETS
-      if (gfx_widgets_paused)
-      {
-         /* In this case we have to unpause and then repause for a frame */
-         p_dispwidget->ai_service_overlay_state = 2;
-         command_event(CMD_EVENT_UNPAUSE, NULL);
-      }
-#endif
-   }
-
-   if (     !raw_image_file_data
-         && !raw_sound_data
-         && !txt_str
-         && !key_str
-         && (access_st->ai_service_auto != 2))
-   {
-      error = "Invalid JSON body.";
-      goto finish;
-   }
-
-   if (raw_image_file_data)
-   {
-      unsigned image_width, image_height;
-      /* Get the video frame dimensions reference */
-      const void *dummy_data = video_st->frame_cache_data;
-      unsigned width         = video_st->frame_cache_width;
-      unsigned height        = video_st->frame_cache_height;
-
-      /* try two different modes for text display *
-       * In the first mode, we use display widget overlays, but they require
-       * the video poke interface to be able to load image buffers.
-       *
-       * The other method is to draw to the video buffer directly, which needs
-       * a software core to be running. */
-#ifdef HAVE_GFX_WIDGETS
-      if (   video_st->poke
-          && video_st->poke->load_texture
-          && video_st->poke->unload_texture)
-      {
-         enum image_type_enum image_type;
-         /* Write to overlay */
-         if (     raw_image_file_data[0]    == 'B'
-               && raw_image_file_data[1]    == 'M')
-             image_type = IMAGE_TYPE_BMP;
-         else if (   raw_image_file_data[1] == 'P'
-                  && raw_image_file_data[2] == 'N'
-                  && raw_image_file_data[3] == 'G')
-            image_type = IMAGE_TYPE_PNG;
-         else
-         {
-            RARCH_LOG("Invalid image type returned from server.\n");
-            goto finish;
-         }
-
-         if (!gfx_widgets_ai_service_overlay_load(
-               raw_image_file_data, (unsigned)new_image_size,
-               image_type))
-         {
-            RARCH_LOG("Video driver not supported for AI Service.");
-            runloop_msg_queue_push(
-               /* msg_hash_to_str(MSG_VIDEO_DRIVER_NOT_SUPPORTED), */
-               "Video driver not supported.",
-               1, 180, true,
-               NULL, MESSAGE_QUEUE_ICON_DEFAULT, MESSAGE_QUEUE_CATEGORY_INFO);
-         }
-         else if (gfx_widgets_paused)
-         {
-            /* In this case we have to unpause and then repause for a frame */
-            /* Unpausing state */
-            p_dispwidget->ai_service_overlay_state = 2;
-            command_event(CMD_EVENT_UNPAUSE, NULL);
-         }
-      }
       else
+         RARCH_LOG("[Translate] %s\n", intl);
 #endif
-      /* Can't use display widget overlays, so try writing to video buffer */
-      {
-         size_t pitch;
-         /* Write to video buffer directly (software cores only) */
-
-         /* This is a BMP file coming back. */
-         if (     raw_image_file_data[0] == 'B'
-               && raw_image_file_data[1] == 'M')
-         {
-            /* Get image data (24 bit), and convert to the emulated pixel format */
-            image_width    =
-               ((uint32_t) ((uint8_t)raw_image_file_data[21]) << 24) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[20]) << 16) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[19]) << 8) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[18]) << 0);
-
-            image_height   =
-               ((uint32_t) ((uint8_t)raw_image_file_data[25]) << 24) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[24]) << 16) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[23]) << 8) +
-               ((uint32_t) ((uint8_t)raw_image_file_data[22]) << 0);
-            raw_image_data = (void*)malloc(image_width * image_height * 3 * sizeof(uint8_t));
-            memcpy(raw_image_data,
-                   raw_image_file_data + 54       * sizeof(uint8_t),
-                   image_width * image_height * 3 * sizeof(uint8_t));
-         }
-         /* PNG coming back from the url */
-         else if (raw_image_file_data[1] == 'P'
-               && raw_image_file_data[2] == 'N'
-               && raw_image_file_data[3] == 'G')
-         {
-            int retval   = 0;
-            rpng_t *rpng = NULL;
-            image_width  =
-                ((uint32_t) ((uint8_t)raw_image_file_data[16]) << 24)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[17]) << 16)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[18]) << 8)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[19]) << 0);
-            image_height =
-                ((uint32_t) ((uint8_t)raw_image_file_data[20]) << 24)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[21]) << 16)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[22]) << 8)+
-                ((uint32_t) ((uint8_t)raw_image_file_data[23]) << 0);
-
-            if (!(rpng = rpng_alloc()))
-            {
-               error = "Can't allocate memory.";
-               goto finish;
-            }
-
-            rpng_set_buf_ptr(rpng, raw_image_file_data, (size_t)new_image_size);
-            rpng_start(rpng);
-            while (rpng_iterate_image(rpng));
-
-            do
-            {
-               retval = rpng_process_image(rpng, &raw_image_data_alpha,
-                     (size_t)new_image_size, &image_width, &image_height);
-            } while (retval == IMAGE_PROCESS_NEXT);
-
-            /* Returned output from the png processor is an upside down RGBA
-             * image, so we have to change that to RGB first.  This should
-             * probably be replaced with a scaler call.*/
-            {
-               unsigned ui;
-               int tw, th, tc;
-               int d          = 0;
-               raw_image_data = (void*)malloc(image_width*image_height*3*sizeof(uint8_t));
-               for (ui = 0; ui < image_width * image_height * 4; ui++)
-               {
-                  if (ui % 4 != 3)
-                  {
-                     tc = d % 3;
-                     th = image_height-d / (image_width * 3) - 1;
-                     tw = (d % (image_width * 3)) / 3;
-                     ((uint8_t*) raw_image_data)[tw * 3 + th * 3 * image_width + tc] = ((uint8_t *)raw_image_data_alpha)[ui];
-                     d += 1;
-                  }
-               }
-            }
-            rpng_free(rpng);
-         }
-         else
-         {
-            RARCH_LOG("Output from URL not a valid file type, or is not supported.\n");
-            goto finish;
-         }
-
-         if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx))))
-            goto finish;
-
-         if (dummy_data == RETRO_HW_FRAME_BUFFER_VALID)
-         {
-            /*
-               In this case, we used the viewport to grab the image
-               and translate it, and we have the translated image in
-               the raw_image_data buffer.
-            */
-            RARCH_LOG("Hardware frame buffer core, but selected video driver isn't supported.\n");
-            goto finish;
-         }
-
-         /* The assigned pitch may not be reliable.  The width of
-            the video frame can change during run-time, but the
-            pitch may not, so we just assign it as the width
-            times the byte depth.
-         */
-
-         if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
-         {
-            raw_output_data    = (uint8_t*)malloc(width * height * 4 * sizeof(uint8_t));
-            scaler->out_fmt    = SCALER_FMT_ARGB8888;
-            pitch              = width * 4;
-            scaler->out_stride = (int)pitch;
-         }
-         else
-         {
-            raw_output_data    = (uint8_t*)malloc(width * height * 2 * sizeof(uint8_t));
-            scaler->out_fmt    = SCALER_FMT_RGB565;
-            pitch              = width * 2;
-            scaler->out_stride = width;
-         }
-
-         if (!raw_output_data)
-            goto finish;
-
-         scaler->in_fmt        = SCALER_FMT_BGR24;
-         scaler->in_width      = image_width;
-         scaler->in_height     = image_height;
-         scaler->out_width     = width;
-         scaler->out_height    = height;
-         scaler->scaler_type   = SCALER_TYPE_POINT;
-         scaler_ctx_gen_filter(scaler);
-         scaler->in_stride     = -1 * width * 3;
-
-         scaler_ctx_scale_direct(scaler, raw_output_data,
-               (uint8_t*)raw_image_data + (image_height - 1) * width * 3);
-         video_driver_frame(raw_output_data, image_width, image_height, pitch);
-      }
+      return true;
    }
+   return false;
+}
 
-#ifdef HAVE_AUDIOMIXER
-   if (raw_sound_data)
-   {
-      audio_mixer_stream_params_t params;
+/**
+ * Displays the given message on screen and returns true. Returns false if no
+ * {message} is provided (i.e. it is NULL). The message will be displayed as
+ * an error and it will be logged. The message will also be played by the 
+ * accessibility narrator if the user enabled it.
+ */
+static INLINE bool translation_user_error(const char *message)
+{
+   return translation_user_message(message, true);
+}
 
-      params.volume               = 1.0f;
-      params.slot_selection_type  = AUDIO_MIXER_SLOT_SELECTION_MANUAL; /* user->slot_selection_type; */
-      params.slot_selection_idx   = 10;
-      params.stream_type          = AUDIO_STREAM_TYPE_SYSTEM; /* user->stream_type; */
-      params.type                 = AUDIO_MIXER_TYPE_WAV;
-      params.state                = AUDIO_STREAM_STATE_PLAYING;
-      params.buf                  = raw_sound_data;
-      params.bufsize              = new_sound_size;
-      params.cb                   = NULL;
-      params.basename             = NULL;
+/**
+ * Displays the given message on screen and returns true. Returns false if no
+ * {message} is provided (i.e. it is NULL). The message will be displayed as
+ * information and will only be logged if this is a debug build. The message 
+ * will also be played by the accessibility narrator if the user enabled it.
+ */
+static INLINE bool translation_user_info(const char *message)
+{
+   return translation_user_message(message, false);
+}
 
-      audio_driver_mixer_add_stream(&params);
+/**
+ * Displays the given hash on screen and returns true. Returns false if no
+ * {hash} is provided (i.e. it is NULL). The message will be displayed as
+ * an error and it will be logged. The message will also be played by the 
+ * accessibility narrator if the user enabled it.
+ */
+static INLINE bool translation_hash_error(enum msg_hash_enums hash)
+{
+   return translation_hash_message(hash, true);
+}
 
-      if (raw_sound_data)
-      {
-         free(raw_sound_data);
-         raw_sound_data = NULL;
-      }
-   }
+/**
+ * Displays the given hash on screen and returns true. Returns false if no
+ * {hash} is provided (i.e. it is NULL). The message will be displayed as
+ * information and will only be logged if this is a debug build. The message 
+ * will also be played by the accessibility narrator if the user enabled it.
+ */
+static INLINE bool translation_hash_info(enum msg_hash_enums hash)
+{
+   return translation_hash_message(hash, false);
+}
+
+/**
+ * Releases all data held by the service and stops it as soon as possible.
+ * If {inform} is true, a message will be displayed to the user if the service
+ * was running in automatic mode to warn them that it is now stopping.
+ */
+void translation_release(bool inform)
+{
+#ifdef HAVE_GFX_WIDGETS
+   dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
+#endif
+   access_state_t *access_st      = access_state_get_ptr();
+   unsigned service_auto_prev     = access_st->ai_service_auto;
+   access_st->ai_service_auto     = 0;
+   
+#ifdef DEBUG
+   RARCH_LOG("[Translate]: AI Service is now stopping.\n");
 #endif
 
-   if (key_str)
+   if (access_st->request_task)
+      task_set_cancelled(access_st->request_task, true);
+   if (access_st->response_task)
+      task_set_cancelled(access_st->response_task, true);
+   
+#ifdef HAVE_THREADS
+   if (access_st->image_lock)
    {
-      size_t i;
-      char key[8];
-      size_t length = strlen(key_str);
-      size_t start  = 0;
-
-      for (i = 1; i < length; i++)
-      {
-         char t = key_str[i];
-         if (i == length - 1 || t == ' ' || t == ',')
-         {
-            if (i == length - 1 && t != ' ' && t!= ',')
-               i++;
-
-            if (i-start > 7)
-            {
-               start = i;
-               continue;
-            }
-
-            strncpy(key, key_str + start, i-start);
-            key[i-start] = '\0';
-
-#ifdef HAVE_ACCESSIBILITY
-            if (string_is_equal(key, "b"))
-               input_st->ai_gamepad_state[0]  = 2;
-            if (string_is_equal(key, "y"))
-               input_st->ai_gamepad_state[1]  = 2;
-            if (string_is_equal(key, "select"))
-               input_st->ai_gamepad_state[2]  = 2;
-            if (string_is_equal(key, "start"))
-               input_st->ai_gamepad_state[3]  = 2;
-
-            if (string_is_equal(key, "up"))
-               input_st->ai_gamepad_state[4]  = 2;
-            if (string_is_equal(key, "down"))
-               input_st->ai_gamepad_state[5]  = 2;
-            if (string_is_equal(key, "left"))
-               input_st->ai_gamepad_state[6]  = 2;
-            if (string_is_equal(key, "right"))
-               input_st->ai_gamepad_state[7]  = 2;
-
-            if (string_is_equal(key, "a"))
-               input_st->ai_gamepad_state[8]  = 2;
-            if (string_is_equal(key, "x"))
-               input_st->ai_gamepad_state[9]  = 2;
-            if (string_is_equal(key, "l"))
-               input_st->ai_gamepad_state[10] = 2;
-            if (string_is_equal(key, "r"))
-               input_st->ai_gamepad_state[11] = 2;
-
-            if (string_is_equal(key, "l2"))
-               input_st->ai_gamepad_state[12] = 2;
-            if (string_is_equal(key, "r2"))
-               input_st->ai_gamepad_state[13] = 2;
-            if (string_is_equal(key, "l3"))
-               input_st->ai_gamepad_state[14] = 2;
-            if (string_is_equal(key, "r3"))
-               input_st->ai_gamepad_state[15] = 2;
+      slock_lock(access_st->image_lock);
 #endif
-
-            if (string_is_equal(key, "pause"))
-               command_event(CMD_EVENT_PAUSE, NULL);
-            if (string_is_equal(key, "unpause"))
-               command_event(CMD_EVENT_UNPAUSE, NULL);
-
-            start = i+1;
-         }
-      }
+      if (access_st->last_image)
+         free(access_st->last_image);
+   
+      access_st->last_image      = NULL;
+      access_st->last_image_size = 0;
+      
+#ifdef HAVE_THREADS
+      slock_unlock(access_st->image_lock);
    }
-
-#ifdef HAVE_ACCESSIBILITY
-   if (     txt_str
-         && is_accessibility_enabled(
-            accessibility_enable,
-            access_st->enabled))
-      accessibility_speak_priority(
-            accessibility_enable,
-            accessibility_narrator_speech_speed,
-            txt_str, 10);
 #endif
 
-finish:
-   if (error)
-      RARCH_ERR("%s: %s\n", msg_hash_to_str(MSG_DOWNLOAD_FAILED), error);
-
-   if (user_data)
-      free(user_data);
-
-   if (json)
-      rjson_free(json);
-   if (raw_image_file_data)
-      free(raw_image_file_data);
-   if (raw_image_data_alpha)
-       free(raw_image_data_alpha);
-   if (raw_image_data)
-      free(raw_image_data);
-   if (scaler)
-      free(scaler);
-   if (err_str)
-      free(err_str);
-   if (txt_str)
-      free(txt_str);
-   if (raw_output_data)
-      free(raw_output_data);
+#ifdef HAVE_GFX_WIDGETS
+   if (p_dispwidget->ai_service_overlay_state != 0)
+      gfx_widgets_ai_service_overlay_unload();
+#endif
 
-   if (auto_str)
-   {
-      if (string_is_equal(auto_str, "auto"))
-      {
-         bool was_paused = (runloop_flags & RUNLOOP_FLAG_PAUSED) ? true : false;
-         if (     (access_st->ai_service_auto != 0)
-               && !settings->bools.ai_service_pause)
-            call_auto_translate_task(settings, &was_paused);
-      }
-      free(auto_str);
-   }
-   if (key_str)
-      free(key_str);
+   if (inform && service_auto_prev != 0)
+      translation_hash_info(MSG_AI_AUTO_MODE_DISABLED);
 }
 
-static const char *ai_service_get_str(enum translation_lang id)
+/**
+ * Returns the string representation of the translation language enum value.
+ */
+static const char* ai_service_get_str(enum translation_lang id)
 {
    switch (id)
    {
@@ -768,120 +472,790 @@ static const char *ai_service_get_str(enum translation_lang id)
    return "";
 }
 
-bool run_translation_service(settings_t *settings, bool paused)
-{
-   struct video_viewport vp;
-   uint8_t header[54];
-   size_t pitch;
-   unsigned width, height;
-   const void *data                  = NULL;
-   uint8_t *bit24_image              = NULL;
-   uint8_t *bit24_image_prev         = NULL;
-   struct scaler_ctx *scaler         = (struct scaler_ctx*)
-      calloc(1, sizeof(struct scaler_ctx));
-   bool error                        = false;
-
-   uint8_t *bmp_buffer               = NULL;
-   uint64_t buffer_bytes             = 0;
-   char *bmp64_buffer                = NULL;
-   rjsonwriter_t *jsonwriter         = NULL;
-   const char *json_buffer           = NULL;
-   int bmp64_length                  = 0;
-   bool TRANSLATE_USE_BMP            = false;
-   char *sys_lbl                     = NULL;
-   core_info_t *core_info            = NULL;
-   video_driver_state_t *video_st    = video_state_get_ptr();
-   access_state_t *access_st         = access_state_get_ptr();
-#ifdef HAVE_ACCESSIBILITY
-   input_driver_state_t *input_st    = input_state_get_ptr();
-#endif
-#ifdef HAVE_GFX_WIDGETS
-   dispgfx_widget_t *p_dispwidget    = dispwidget_get_ptr();
-   /* For the case when ai service pause is disabled. */
-   if (     (p_dispwidget->ai_service_overlay_state != 0)
-         && (access_st->ai_service_auto == 1))
-   {
-      gfx_widgets_ai_service_overlay_unload();
-      goto finish;
-   }
-#endif
+/* AUTOMATION --------------------------------------------------------------- */
+/* -------------------------------------------------------------------------- */
 
+/**
+ * Handler invoking the next automatic request. This method simply waits for
+ * any previous request to terminate before re-invoking the translation service.
+ * By delegating this to a task handler we can safely do so in the task thread
+ * instead of hogging the main thread.
+ */
+static void call_auto_translate_hndl(retro_task_t *task)
+{
+   int *mode_ptr               = (int*)task->user_data;
+   uint32_t runloop_flags      = runloop_get_flags();
+   access_state_t *access_st   = access_state_get_ptr();
+   settings_t *settings        = config_get_ptr();
 
-   /* get the core info here so we can pass long the game name */
-   core_info_get_current_core(&core_info);
+   if (task_get_cancelled(task))
+      goto finish;
 
-   if (core_info)
+   switch (*mode_ptr)
    {
-      size_t lbl_len;
-      const char *lbl                     = NULL;
-      const char *sys_id                  = core_info->system_id
-         ? core_info->system_id : "core";
-      size_t sys_id_len                   = strlen(sys_id);
-      const struct playlist_entry *entry  = NULL;
-      playlist_t *current_playlist        = playlist_get_cached();
+      case 1: /* Speech Mode   */
+#ifdef HAVE_AUDIOMIXER
+         if (!audio_driver_is_ai_service_speech_running())
+            goto finish;
+#endif
+         break;
+      case 2: /* Narrator Mode    */
+      case 3: /* Text Mode        */
+      case 4: /* Text + Narrator  */
+      case 5: /* Image + Narrator */
+#ifdef HAVE_ACCESSIBILITY
+         if (!is_narrator_running(settings->bools.accessibility_enable))
+            goto finish;
+#endif
+         break;
+      default:
+         goto finish;
+   }
+   return;
 
-      if (current_playlist)
+finish:
+   task_set_finished(task, true);
+
+   if (task->user_data)
+      free(task->user_data);
+
+   /* Final check to see if the user did not disable the service altogether */
+   if (access_st->ai_service_auto != 0)
+   {
+      bool was_paused = runloop_flags & RUNLOOP_FLAG_PAUSED;
+      command_event(CMD_EVENT_AI_SERVICE_CALL, &was_paused);
+   }
+}
+
+/**
+ * Invokes the next automatic request. This method delegates the invokation to
+ * a task to allow for threading. The task will only execute after the polling
+ * delay configured by the user has been honored since the last request.
+ */
+static void call_auto_translate_task(settings_t *settings)
+{
+   int* mode                  = NULL;
+   access_state_t *access_st  = access_state_get_ptr();
+   int ai_service_mode        = settings->uints.ai_service_mode;
+   unsigned delay             = settings->uints.ai_service_poll_delay;
+   retro_task_t *task         = task_init();
+   if (!task)
+      return;
+
+   mode  = (int*)malloc(sizeof(int));
+   *mode = ai_service_mode;
+   
+   task->handler     = call_auto_translate_hndl;
+   task->user_data   = mode;
+   task->mute        = true;
+   task->when        = access_st->last_call + (delay * 1000);
+   task_queue_push(task);
+}
+
+/* RESPONSE ----------------------------------------------------------------- */
+/* -------------------------------------------------------------------------- */
+
+/**
+ * Parses the JSON returned by the translation server and returns structured
+ * data. May return NULL if the parsing cannot be completed or the JSON is
+ * malformed. If unsupported keys are provided in the JSON, they will simply
+ * be ignored. Only the available data will be populated in the returned object
+ * and everything else will be zero-initialized.
+ */
+static access_response_t* parse_response_json(http_transfer_data_t *data)
+{
+   int key                       = -1;
+   rjson_t* json                 = NULL;
+   char* image_data              = NULL;
+   int image_size                = 0;
+#ifdef HAVE_AUDIOMIXER
+   void *sound_data              = NULL;
+   int sound_size                = 0;
+#endif
+   access_response_t *response   = NULL;
+   bool empty                    = true;
+   enum rjson_type type;
+   
+   if (!data || !data->data)
+      goto finish;
+   if (!(json = rjson_open_buffer(data->data, data->len)))
+      goto finish;
+   if (!(response = (access_response_t*)calloc(1, sizeof(access_response_t))))
+      goto finish;
+
+   for (;;)
+   {
+      size_t length        = 0;
+      const char *string   = NULL;
+      type                 = rjson_next(json);
+
+      if (type == RJSON_DONE || type == RJSON_ERROR)
+         break;
+      if (rjson_get_context_type(json) != RJSON_OBJECT)
+         continue;
+      
+      if (type == RJSON_STRING && (rjson_get_context_count(json) & 1) == 1)
       {
-         playlist_get_index_by_path(
-            current_playlist, path_get(RARCH_PATH_CONTENT), &entry);
+         int i;
+         string = rjson_get_string(json, &length);
+         for (i = 0; i < ARRAY_SIZE(ACCESS_RESPONSE_KEYS) && key == -1; i++)
+         {
+            if (string_is_equal(string, ACCESS_RESPONSE_KEYS[i]))
+               key = i;
+         }
+      }
+      else
+      {
+         if (type != RJSON_STRING && key < 6)
+            continue;
+         else
+            string = rjson_get_string(json, &length);
+         
+         switch (key)
+         {
+            case 0: /* image */
+               response->image = (length == 0) ? NULL : (char*)unbase64(
+                     string, (int)length, &response->image_size);
+               break;
+#ifdef HAVE_AUDIOMIXER
+            case 1: /* sound */
+               response->sound = (length == 0) ? NULL : (void*)unbase64(
+                     string, (int)length, &response->sound_size);
+               break;
+#endif
+            case 2: /* text */
+               response->text = strdup(string);
+               break;
+            case 3: /* error */
+               response->error = strdup(string);
+               break;
+            case 4: /* auto */
+               response->recall = strdup(string);
+               break;
+            case 5: /* press */
+               response->input = strdup(string);
+               break;
+            case 6: /* text_position */
+               if (type == RJSON_NUMBER)
+                  response->text_position = rjson_get_int(json);
+               break;
+         }
+         key = -1;
+      }
+   }
+   
+   if (type == RJSON_ERROR)
+   {
+      RARCH_LOG("[Translate] JSON error: %s\n", rjson_get_error(json));
+      translation_user_error("Service returned a malformed JSON");
+      free(response);
+      response = NULL;
+   }
+   
+finish:
+   if (json)
+      rjson_free(json);
+   else
+      translation_user_error("Internal error parsing returned JSON.");
+   
+   return response;
+}
 
-         if (entry && !string_is_empty(entry->label))
-            lbl = entry->label;
+/**
+ * Parses the image data of given type and displays it using widgets. If the
+ * image widget is already shown, it will be unloaded first automatically.
+ * This method will disable automatic translation if the widget could not be
+ * loaded to prevent further errors.
+ */
+#ifdef HAVE_GFX_WIDGETS
+static void translation_response_image_widget(
+      char *image, int image_length, enum image_type_enum *image_type)
+{
+   video_driver_state_t *video_st = video_state_get_ptr();
+   dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
+   access_state_t *access_st      = access_state_get_ptr();
+ 
+   bool ai_res;
+   bool gfx_widgets_paused        = video_st->flags & VIDEO_FLAG_WIDGETS_PAUSED;
+   
+   if (p_dispwidget->ai_service_overlay_state != 0)
+      gfx_widgets_ai_service_overlay_unload();
+   
+   ai_res = gfx_widgets_ai_service_overlay_load(
+         image, (unsigned)image_length, (*image_type));
+
+   if (!ai_res)
+   {
+      translation_hash_error(MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED);
+      translation_release(true);
+   }
+   else if (gfx_widgets_paused)
+   {
+      /* Unpause for a frame otherwise widgets won't be displayed */
+      p_dispwidget->ai_service_overlay_state = 2;
+      command_event(CMD_EVENT_UNPAUSE, NULL);
+   }
+}
+#endif
+
+/**
+ * Parses the image buffer, converting the data to the raw image format we need
+ * to display the image within RetroArch. Writes the raw image data in {body} 
+ * as well as its {width} and {height} as determined by the image header. 
+ * Returns true if the process was successful.
+ */
+static bool translation_get_image_body(
+      char *image, int image_size, enum image_type_enum *image_type,
+      void *body, unsigned *width, unsigned *height)
+{
+#ifdef HAVE_RPNG
+   rpng_t *rpng      = NULL;
+   void *rpng_alpha  = NULL;
+   int rpng_ret      = 0;
+#endif
+   
+   if ((*image_type) == IMAGE_TYPE_BMP)
+   {
+      if (image_size < 55)
+         return false;
+      
+      *width   = ((uint32_t) ((uint8_t)image[21]) << 24) 
+               + ((uint32_t) ((uint8_t)image[20]) << 16) 
+               + ((uint32_t) ((uint8_t)image[19]) <<  8) 
+               + ((uint32_t) ((uint8_t)image[18]) <<  0);
+      *height  = ((uint32_t) ((uint8_t)image[25]) << 24) 
+               + ((uint32_t) ((uint8_t)image[24]) << 16) 
+               + ((uint32_t) ((uint8_t)image[23]) <<  8) 
+               + ((uint32_t) ((uint8_t)image[22]) <<  0);
+               
+      image_size = (*width) * (*height) * 3 * sizeof(uint8_t);
+      body       = (void*)malloc(image_size);
+      if (!body)
+         return false;
+      
+      memcpy(body, image + 54 * sizeof(uint8_t), image_size);
+      return true;
+   }
+   
+#ifdef HAVE_RPNG
+   else if ((*image_type) == IMAGE_TYPE_PNG)
+   {
+      if (image_size < 24)
+         return false;
+      if (!(rpng = rpng_alloc()))
+         return false;
+      
+      *width   = ((uint32_t) ((uint8_t)image[16]) << 24)
+               + ((uint32_t) ((uint8_t)image[17]) << 16)
+               + ((uint32_t) ((uint8_t)image[18]) <<  8)
+               + ((uint32_t) ((uint8_t)image[19]) <<  0);
+      *height  = ((uint32_t) ((uint8_t)image[20]) << 24)
+               + ((uint32_t) ((uint8_t)image[21]) << 16)
+               + ((uint32_t) ((uint8_t)image[22]) <<  8)
+               + ((uint32_t) ((uint8_t)image[23]) <<  0);
+
+      rpng_set_buf_ptr(rpng, image, (size_t)image_size);
+      rpng_start(rpng);
+      while (rpng_iterate_image(rpng));
+
+      do
+      {
+         rpng_ret = rpng_process_image(
+               rpng, &rpng_alpha, (size_t)image_size, width, height);
+      } while (rpng_ret == IMAGE_PROCESS_NEXT);
+
+      /* 
+       * Returned output from the png processor is an upside down RGBA
+       * image, so we have to change that to RGB first. This should
+       * probably be replaced with a scaler call. 
+       */
+      {
+         int d      = 0;
+         int tw, th, tc;
+         unsigned ui;
+         image_size = (*width) * (*height) * 3 * sizeof(uint8_t);
+         body       = (void*)malloc(image_size);
+         if (!body)
+         {
+            free(rpng_alpha);
+            rpng_free(rpng);
+            return false;
+         }
+         
+         for (ui = 0; ui < (*width) * (*height) * 4; ui++)
+         {
+            if (ui % 4 != 3)
+            {
+               tc    = d % 3;
+               th    = (*height) - d / (3 * (*width)) - 1;
+               tw    = (d % ((*width) * 3)) / 3;
+               ((uint8_t*) body)[tw * 3 + th * 3 * (*width) + tc] 
+                     = ((uint8_t*)rpng_alpha)[ui];
+               d++;
+            }
+         }
       }
+      free(rpng_alpha);
+      rpng_free(rpng);
+      return true;
+   }
+#endif
+
+   return false;
+}
+
+/**
+ * Displays the raw image on screen by directly writing to the frame buffer.
+ * This method may fail depending on the current video driver.
+ */
+ /* TODO/FIXME: Does nothing with Vulkan apparently? */
+static void translation_response_image_direct(
+      char *image, int image_size, enum image_type_enum *image_type)
+{
+   size_t pitch;
+   unsigned width;
+   unsigned height;
+   unsigned vp_width;
+   unsigned vp_height;
+   
+   void *image_body                                    = NULL;
+   uint8_t *raw_output_data                            = NULL;
+   size_t raw_output_size                              = 0;
+   const void *dummy_data                              = NULL;
+   struct scaler_ctx *scaler                           = NULL;
+   video_driver_state_t *video_st                      = video_state_get_ptr();
+   const enum retro_pixel_format video_driver_pix_fmt  = video_st->pix_fmt;
+   
+   if (!(translation_get_image_body(
+         image, image_size, image_type, image_body, &width, &height)))
+      goto finish;
+
+   if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx))))
+      goto finish;
+   
+   dummy_data  = video_st->frame_cache_data;
+   vp_width    = video_st->frame_cache_width;
+   vp_height   = video_st->frame_cache_height;
+   pitch       = video_st->frame_cache_pitch;
 
-      if (!lbl)
-         lbl       = path_basename(path_get(RARCH_PATH_BASENAME));
-      lbl_len      = strlen(lbl);
-      sys_lbl      = (char*)malloc(lbl_len + sys_id_len + 3);
-      memcpy(sys_lbl, sys_id, sys_id_len);
-      memcpy(sys_lbl + sys_id_len, "__", 2);
-      memcpy(sys_lbl + 2 + sys_id_len, lbl, lbl_len);
-      sys_lbl[sys_id_len + 2 + lbl_len] = '\0';
+   if (!vp_width || !vp_height)
+      goto finish;
+   
+   if (dummy_data == RETRO_HW_FRAME_BUFFER_VALID)
+   {
+      /* In this case, we used the viewport to grab the image and translate it, 
+       * and we have the translated image in the image_body buffer. */
+      translation_user_error("Video driver unsupported for hardware frame.");
+      translation_release(true);
+      goto finish;
    }
 
-   if (!scaler)
+   /* 
+    * The assigned pitch may not be reliable. The width of the video frame can 
+    * change during run-time, but the pitch may not, so we just assign it as 
+    * the width times the byte depth. 
+    */
+   if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
+   {
+      raw_output_size      = vp_width * vp_height * 4 * sizeof(uint8_t);
+      raw_output_data      = (uint8_t*)malloc(raw_output_size);
+      scaler->out_fmt      = SCALER_FMT_ARGB8888;
+      scaler->out_stride   = vp_width * 4;
+      pitch                = vp_width * 4;
+   }
+   else
+   {
+      raw_output_size      = vp_width * vp_height * 2 * sizeof(uint8_t);
+      raw_output_data      = (uint8_t*)malloc(raw_output_size);
+      scaler->out_fmt      = SCALER_FMT_RGB565;
+      scaler->out_stride   = vp_width * 1;
+      pitch                = vp_width * 2;
+   }
+
+   if (!raw_output_data)
       goto finish;
 
-   data       = video_st->frame_cache_data;
-   width      = video_st->frame_cache_width;
-   height     = video_st->frame_cache_height;
-   pitch      = video_st->frame_cache_pitch;
+   scaler->in_fmt        = SCALER_FMT_BGR24;
+   scaler->in_width      = width;
+   scaler->in_height     = height;
+   scaler->out_width     = vp_width;
+   scaler->out_height    = vp_height;
+   scaler->scaler_type   = SCALER_TYPE_POINT;
+   scaler_ctx_gen_filter(scaler);
+   
+   scaler->in_stride     = -1 * vp_width * 3;
+
+   scaler_ctx_scale_direct(
+         scaler, raw_output_data,
+         (uint8_t*)image_body + (height - 1) * width * 3);
+   video_driver_frame(raw_output_data, width, height, pitch);
+   
+finish:
+   if (image_body)
+      free(image_body);
+   if (scaler)
+      free(scaler);
+   if (raw_output_data)
+      free(raw_output_data);
+}
+
+/**
+ * Parses image data received by the server following a translation request.
+ * This method assumes that image data is present in the response, it cannot
+ * be null. If widgets are supported, this method will prefer using them to
+ * overlay the picture on top of the video, otherwise it will try to write the
+ * data directly into the frame buffer, which is much less reliable.
+ */
+static void translation_response_image_hndl(retro_task_t *task)
+{
+   /* 
+    * TODO/FIXME: Moved processing to the callback to fix an issue with
+    * texture loading off the main thread in OpenGL. I'm leaving the original
+    * structure here so we can move back to the handler if it becomes possible
+    * in the future.
+    */
+   task_set_finished(task, true);
+}
 
-   if (!data)
+/**
+ * Callback invoked once the image data received from the server has been
+ * processed and eventually displayed. This is necessary to ensure that the
+ * next automatic request will be invoked once the task is finished.
+ */
+static void translation_response_image_cb(
+      retro_task_t *task, void *task_data, void *user_data, const char *error)
+{
+   settings_t* settings          = config_get_ptr();
+   access_state_t *access_st     = access_state_get_ptr();
+   
+   enum image_type_enum image_type;
+   access_response_t *response      = (access_response_t*)task->user_data;
+   video_driver_state_t *video_st   = video_state_get_ptr();
+   
+   if (task_get_cancelled(task) || response->image_size < 4)
+      goto finish;
+   
+   if (     response->image[0] == 'B' 
+         && response->image[1] == 'M')
+      image_type = IMAGE_TYPE_BMP;
+#ifdef HAVE_RPNG
+   else if (response->image[1] == 'P' 
+         && response->image[2] == 'N' 
+         && response->image[3] == 'G')
+      image_type = IMAGE_TYPE_PNG;
+#endif
+   else
+   {
+      translation_user_error("Service returned an unsupported image type.");
+      translation_release(true);
       goto finish;
+   }
+   
+#ifdef HAVE_GFX_WIDGETS
+   if (     video_st->poke
+         && video_st->poke->load_texture
+         && video_st->poke->unload_texture)
+      translation_response_image_widget(
+            response->image, response->image_size, &image_type);
+   else
+#endif
+      translation_response_image_direct(
+            response->image, response->image_size, &image_type);
+   
+finish:
+   free(response->image);
+   free(response);
 
-   if (data == RETRO_HW_FRAME_BUFFER_VALID)
+   if (access_st->ai_service_auto != 0)
+      call_auto_translate_task(settings);
+}
+
+/**
+ * Processes text data received by the server following a translation request.
+ * Does nothing if the response does not contain any text data (NULL). Text
+ * is either forcibly read by the narrator, even if it is disabled in the 
+ * front-end (Narrator Mode) or displayed on screen (in Text Mode). In the 
+ * later, it will only be read if the front-end narrator is enabled.
+ */
+static void translation_response_text(access_response_t *response)
+{
+   settings_t *settings       = config_get_ptr();
+   unsigned service_mode      = settings->uints.ai_service_mode;
+   access_state_t *access_st  = access_state_get_ptr();
+   
+   if (     (!response->text || string_is_empty(response->text))
+         && (service_mode == 2 || service_mode == 3 || service_mode == 4)
+         && access_st->ai_service_auto == 0)
    {
-      /*
-        The direct frame capture didn't work, so try getting it
-        from the viewport instead.  This isn't as good as the
-        raw frame buffer, since the viewport may us bilinear
-        filtering, or other shaders that will completely trash
-        the OCR, but it's better than nothing.
-      */
-      vp.x                           = 0;
-      vp.y                           = 0;
-      vp.width                       = 0;
-      vp.height                      = 0;
-      vp.full_width                  = 0;
-      vp.full_height                 = 0;
-
-      video_driver_get_viewport_info(&vp);
-
-      if (!vp.width || !vp.height)
-         goto finish;
+      translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE);
+      return;
+   }
+   
+   if (response->text)
+   {
+      /* The text should be displayed on screen in Text or Text+Narrator mode */
+      if (service_mode == 3 || service_mode == 4)
+      {
+#ifdef HAVE_GFX_WIDGETS
+         if (settings->bools.menu_enable_widgets)
+         {
+            dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
+            
+            if (p_dispwidget->ai_service_overlay_state == 1)
+               gfx_widgets_ai_service_overlay_unload();
+            
+            strlcpy(p_dispwidget->ai_service_text, response->text, 255);
+            
+            if (response->text_position > 0)
+               p_dispwidget->ai_service_text_position 
+                     = (unsigned)response->text_position;
+            else
+               p_dispwidget->ai_service_text_position = 0;
+            
+            p_dispwidget->ai_service_overlay_state = 1;
+         }
+         else
+         {
+#endif
+            /* 
+             * TODO/FIXME: Obviously this will not be as good as using widgets, 
+             * since messages run on a timer but it's an alternative at least.
+             * Maybe split the message here so it fits the viewport.
+             */
+            runloop_msg_queue_push(
+                  response->text, 2, 180, 
+                  true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, 
+                  MESSAGE_QUEUE_CATEGORY_INFO);
+                  
+#ifdef HAVE_GFX_WIDGETS
+         }
+#endif
+      }
+      translation_speak(&response->text[0]);
+      free(response->text);
+   }
+}
+
+/**
+ * Processes audio data received by the server following a translation request.
+ * Does nothing if the response does not contain any audio data (NULL). Audio
+ * data is simply played as soon as possible using the audio driver.
+ */
+static void translation_response_sound(access_response_t *response)
+{
+#ifdef HAVE_AUDIOMIXER
+   if (response->sound)
+   {
+      audio_mixer_stream_params_t params;
 
-      bit24_image_prev = (uint8_t*)malloc(vp.width * vp.height * 3);
-      bit24_image      = (uint8_t*)malloc(width * height * 3);
+      params.volume               = 1.0f;
+      /* user->slot_selection_type; */
+      params.slot_selection_type  = AUDIO_MIXER_SLOT_SELECTION_MANUAL; 
+      params.slot_selection_idx   = 10;
+      /* user->stream_type; */
+      params.stream_type          = AUDIO_STREAM_TYPE_SYSTEM; 
+      params.type                 = AUDIO_MIXER_TYPE_WAV;
+      params.state                = AUDIO_STREAM_STATE_PLAYING;
+      params.buf                  = response->sound;
+      params.bufsize              = response->sound_size;
+      params.cb                   = NULL;
+      params.basename             = NULL;
+
+      audio_driver_mixer_add_stream(&params);
+      free(response->sound);
+   }
+#endif
+}
+
+/**
+ * Processes input data received by the server following a translation request.
+ * Does nothing if the response does not contain any input data (NULL). This
+ * method will try to forcibly press all the retropad keys listed in the input
+ * string (comma-separated).
+ */
+static void translation_response_input(access_response_t *response)
+{
+   if (response->input)
+   {
+#ifdef HAVE_ACCESSIBILITY
+      input_driver_state_t *input_st   = input_state_get_ptr();
+#endif
+      int length                       = strlen(response->input);
+      char *token                      = strtok(response->input, ",");
+
+      while (token)
+      {
+         if (string_is_equal(token, "pause"))
+            command_event(CMD_EVENT_PAUSE, NULL);
+         else if (string_is_equal(token, "unpause"))
+            command_event(CMD_EVENT_UNPAUSE, NULL);
+#ifdef HAVE_ACCESSIBILITY
+         else
+         {
+            int i      = 0;
+            bool found = false;
+            
+            for (; i < ARRAY_SIZE(ACCESS_INPUT_LABELS) && !found; i++)
+               found = string_is_equal(ACCESS_INPUT_LABELS[i], response->input);
+            
+            if (found)
+               input_st->ai_gamepad_state[i] = 2;
+         }
+#endif
+         token = strtok(NULL, ",");
+      }
+      free(response->input);
+   }
+}
+
+/**
+ * Callback invoked when the server responds to our translation request. If the
+ * service is still running by then, this method will parse the JSON payload
+ * and process the data, eventually re-invoking the translation service for
+ * a new request if the server allowed automatic translation.
+ */
+static void translation_response_cb(
+      retro_task_t *task, void *task_data, void *user_data, const char *error)
+{
+   http_transfer_data_t *data       = (http_transfer_data_t*)task_data;
+   access_state_t *access_st        = access_state_get_ptr();
+   settings_t *settings             = config_get_ptr();
+   access_response_t *response      = NULL;
+   bool auto_mode_prev              = access_st->ai_service_auto;
+   unsigned service_mode            = settings->uints.ai_service_mode;
+   
+   /* We asked the service to stop by calling translation_release, so bail */
+   if (!access_st->last_image)
+      goto finish;
+   if (translation_user_error(error))
+      goto abort;
+   if (!(response = parse_response_json(data)))
+      goto abort;
+   if (translation_user_error(response->error))
+      goto abort;
+
+   access_st->ai_service_auto = (response->recall == NULL) ? 0 : 1;
+   if (auto_mode_prev != access_st->ai_service_auto)
+      translation_hash_info(auto_mode_prev 
+            ? MSG_AI_AUTO_MODE_DISABLED : MSG_AI_AUTO_MODE_ENABLED);
+
+   /* 
+    * We want to skip the data on auto=continue, unless automatic translation
+    * has just been enabled, meaning data must be displayed again to the user.
+    */
+   if (     !string_is_equal(response->recall, "continue") 
+         || (auto_mode_prev == 0 && access_st->ai_service_auto == 1))
+   {
+#ifdef HAVE_GFX_WIDGETS
+      dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
+      if (p_dispwidget->ai_service_overlay_state != 0)
+         gfx_widgets_ai_service_overlay_unload();
+#endif
+      translation_response_text(response);
+      translation_response_sound(response);
+      translation_response_input(response);
+   
+      if (response->image)
+      {
+         retro_task_t *task = task_init();
+         if (!task)
+            goto finish;
+         
+         task->handler              = translation_response_image_hndl;
+         task->callback             = translation_response_image_cb;
+         task->user_data            = response;
+         task->mute                 = true;
+         access_st->response_task   = task;
+         task_queue_push(task);
+
+         /* Leave memory clean-up and auto callback to the task itself */
+         return;
+      }
+      else if (access_st->ai_service_auto == 0
+            && (service_mode == 0 || service_mode == 5))
+         translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE);
+   }
+   goto finish;
+   
+abort:
+   translation_release(true);
+   if (response && response->error)
+      free(response->error);
+
+finish:
+   if (response)
+   {
+      if (response->image)
+         free(response->image);
+      if (response->recall)
+         free(response->recall);
+      free(response);
+      
+      if (access_st->ai_service_auto != 0)
+         call_auto_translate_task(settings);
+   }
+}
 
-      if (!bit24_image_prev || !bit24_image)
+/* REQUEST ------------------------------------------------------------------ */
+/* -------------------------------------------------------------------------- */
+
+/**
+ * Grabs and returns a frame from the video driver. If the frame buffer cannot
+ * be accessed, this method will try to obtain a capture of the viewport as a
+ * fallback, although this frame may be altered by any filter or shader enabled
+ * by the user. Returns null if both methods fail.
+ */
+static access_frame_t* translation_grab_frame()
+{
+   size_t pitch;
+   struct video_viewport vp               = {0};
+   const void *data                       = NULL;
+   uint8_t *bit24_image_prev              = NULL;
+   struct scaler_ctx *scaler              = NULL;
+   access_frame_t *frame                  = NULL;
+   video_driver_state_t *video_st         = video_state_get_ptr();
+   const enum retro_pixel_format pix_fmt  = video_st->pix_fmt;
+      
+   if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx))))
+      goto finish;   
+   if (!(frame = (access_frame_t*)malloc(sizeof(access_frame_t))))
+      goto finish;
+   
+   data           = video_st->frame_cache_data;
+   frame->width   = video_st->frame_cache_width;
+   frame->height  = video_st->frame_cache_height;
+   pitch          = video_st->frame_cache_pitch;
+
+   if (!data)
+      goto finish;
+
+   video_driver_get_viewport_info(&vp);
+   if (!vp.width || !vp.height)
+      goto finish;
+   
+   frame->content_x        = vp.x;
+   frame->content_y        = vp.y;
+   frame->content_width    = vp.width;
+   frame->content_height   = vp.height;
+   frame->viewport_width   = vp.full_width;
+   frame->viewport_height  = vp.full_height;
+   frame->size             = frame->width * frame->height * 3;
+   
+   if (!(frame->data = (uint8_t*)malloc(frame->size)))
+      goto finish;
+
+   if (data == RETRO_HW_FRAME_BUFFER_VALID)
+   {
+      /* Direct frame capture failed, fallback on viewport capture */
+      if (!(bit24_image_prev = (uint8_t*)malloc(vp.width * vp.height * 3)))
          goto finish;
 
       if (!(      video_st->current_video->read_viewport
                && video_st->current_video->read_viewport(
                   video_st->data, bit24_image_prev, false)))
       {
-         RARCH_LOG("Could not read viewport for translation service...\n");
+         translation_user_error("Could not read viewport.");
+         translation_release(true);
          goto finish;
       }
 
@@ -891,275 +1265,535 @@ bool run_translation_service(settings_t *settings, bool paused)
       scaler->scaler_type = SCALER_TYPE_POINT;
       scaler->in_width    = vp.width;
       scaler->in_height   = vp.height;
-      scaler->out_width   = width;
-      scaler->out_height  = height;
+      scaler->out_width   = frame->width;
+      scaler->out_height  = frame->height;
       scaler_ctx_gen_filter(scaler);
 
-      scaler->in_stride   = vp.width*3;
-      scaler->out_stride  = width*3;
-      scaler_ctx_scale_direct(scaler, bit24_image, bit24_image_prev);
+      scaler->in_stride   = vp.width * 3;
+      scaler->out_stride  = frame->width * 3;
+      scaler_ctx_scale_direct(scaler, frame->data, bit24_image_prev);
    }
    else
    {
-      const enum retro_pixel_format
-         video_driver_pix_fmt           = video_st->pix_fmt;
-      /* This is a software core, so just change the pixel format to 24-bit. */
-      if (!(bit24_image = (uint8_t*)malloc(width * height * 3)))
-          goto finish;
-
-      if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
+      /* This is a software core, so just change the pixel format to 24-bit */
+      if (pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
          scaler->in_fmt = SCALER_FMT_ARGB8888;
       else
          scaler->in_fmt = SCALER_FMT_RGB565;
+      
       video_frame_convert_to_bgr24(
-         scaler,
-         (uint8_t *)bit24_image,
-         (const uint8_t*)data + ((int)height - 1)*pitch,
-         width, height,
-         (int)-pitch);
+         scaler, frame->data, (const uint8_t*)data, 
+         frame->width, frame->height, (int)pitch);
    }
    scaler_ctx_gen_reset(scaler);
+   
+finish:
+   if (bit24_image_prev)
+      free(bit24_image_prev);
+   if (scaler)
+      free(scaler);
 
-   if (!bit24_image)
+   if (frame)
    {
-      error = true;
-      goto finish;
+      if (frame->data)
+         return frame;
+      else
+         free(frame);
    }
+   return NULL;
+}
 
-   if (TRANSLATE_USE_BMP)
+/**
+ * Returns true if the {frame} passed in parameter is a duplicate of the last
+ * frame the service was invoked on. This method effectively helps to prevent 
+ * the service from spamming the server with the same request over and over 
+ * again when running in automatic mode. This method will also save the image
+ * in the {frame} structure as the new last image for the service.
+ */
+static bool translation_dupe_fail(access_frame_t *frame)
+{
+   access_state_t *access_st  = access_state_get_ptr();
+   bool size_equal            = (frame->size == access_st->last_image_size);
+   bool has_failed            = false;
+   
+#ifdef HAVE_THREADS
+   slock_lock(access_st->image_lock);
+#endif
+   if (access_st->last_image && access_st->ai_service_auto != 0)
    {
-      /*
-        At this point, we should have a screenshot in the buffer,
-        so allocate an array to contain the BMP image along with
-        the BMP header as bytes, and then covert that to a
-        b64 encoded array for transport in JSON.
-      */
-      form_bmp_header(header, width, height, false);
-      if (!(bmp_buffer  = (uint8_t*)malloc(width * height * 3 + 54)))
-         goto finish;
-
-      memcpy(bmp_buffer, header, 54 * sizeof(uint8_t));
-      memcpy(bmp_buffer + 54,
-            bit24_image,
-            width * height * 3 * sizeof(uint8_t));
-      buffer_bytes = sizeof(uint8_t) * (width * height * 3 + 54);
+      if (     size_equal
+            && u8_array_equal(frame->data, access_st->last_image, frame->size))
+         has_failed = true;
    }
-   else
+   
+   /* Init last image or reset buffer size if image size changed */
+   if (!has_failed && (!access_st->last_image || !size_equal))
    {
-      pitch        = width * 3;
-      bmp_buffer   = rpng_save_image_bgr24_string(
-            bit24_image + width * (height-1) * 3,
-            width, height, (signed)-pitch, &buffer_bytes);
+      if (access_st->last_image)
+         free(access_st->last_image);
+
+      access_st->last_image_size  = frame->size;
+      if (!(access_st->last_image = (uint8_t*)malloc(frame->size)))
+         has_failed = true;
    }
+   
+   if (!has_failed)
+      memcpy(access_st->last_image, frame->data, frame->size);
 
-   if (!(bmp64_buffer = base64((void *)bmp_buffer,
-         (int)(sizeof(uint8_t) * buffer_bytes),
-         &bmp64_length)))
-      goto finish;
+#ifdef HAVE_THREADS
+   slock_unlock(access_st->image_lock);
+#endif
+   return has_failed;
+}
 
-   if (!(jsonwriter = rjsonwriter_open_memory()))
+/**
+ * Converts and returns the {frame} as a base64 encoded PNG or BMP. The 
+ * selected image type will be available in the returned object, and will 
+ * favor PNG if possible. Returns NULL on failure.
+ */
+static access_base64_t* translation_frame_encode(access_frame_t *frame)
+{
+   uint8_t header[54];
+   uint8_t *buffer         = NULL;
+   uint64_t bytes          = 0;
+   access_base64_t *encode = NULL;
+   
+   if (!(encode = (access_base64_t*)malloc(sizeof(access_base64_t))))
+      goto finish;
+   
+#ifdef HAVE_RPNG
+   strcpy(encode->format, "png");
+   buffer = rpng_save_image_bgr24_string(
+         frame->data, frame->width, frame->height, 
+         frame->width * 3, &bytes);
+#else
+   strcpy(encode->format, "bmp");
+   form_bmp_header(header, frame->width, frame->height, false);
+   if (!(buffer = (uint8_t*)malloc(frame->size + 54)))
       goto finish;
 
-   rjsonwriter_raw(jsonwriter, "{", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_add_string(jsonwriter, "image");
-   rjsonwriter_raw(jsonwriter, ":", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_add_string_len(jsonwriter, bmp64_buffer, bmp64_length);
+   memcpy(buffer, header, 54 * sizeof(uint8_t));
+   memcpy(buffer + 54, frame->data, frame->size * sizeof(uint8_t));
+   bytes = sizeof(uint8_t) * (frame->size + 54);
+#endif
 
-   /* Form request... */
-   if (sys_lbl)
-   {
-      rjsonwriter_raw(jsonwriter, ",", 1);
-      rjsonwriter_raw(jsonwriter, " ", 1);
-      rjsonwriter_add_string(jsonwriter, "label");
-      rjsonwriter_raw(jsonwriter, ":", 1);
-      rjsonwriter_raw(jsonwriter, " ", 1);
-      rjsonwriter_add_string(jsonwriter, sys_lbl);
-   }
+   encode->data = base64(
+         (void*)buffer, (int)(bytes * sizeof(uint8_t)), &encode->length);
+
+finish:
+   if (buffer)
+      free(buffer);
+   
+   if (encode->data)
+      return encode;
+   else
+      free(encode);
 
-   rjsonwriter_raw(jsonwriter, ",", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_add_string(jsonwriter, "state");
-   rjsonwriter_raw(jsonwriter, ":", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_raw(jsonwriter, "{", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_add_string(jsonwriter, "paused");
-   rjsonwriter_raw(jsonwriter, ":", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_rawf(jsonwriter, "%u", (paused ? 1 : 0));
+   return NULL;
+}
+
+/**
+ * Returns a newly allocated string describing the content and core currently
+ * running. The string will contains the name of the core (or 'core') followed
+ * by a double underscore (_) and the name of the content. Returns NULL on 
+ * failure.
+ */
+static char* translation_get_content_label()
+{
+   const char *label                 = NULL;
+   char* system_label                = NULL;
+   core_info_t *core_info            = NULL;
+   
+   core_info_get_current_core(&core_info);
+   if (core_info)
    {
-      static const char* state_labels[] = { "b", "y", "select", "start", "up", "down", "left", "right", "a", "x", "l", "r", "l2", "r2", "l3", "r3" };
-      int i;
-      for (i = 0; i < (int)ARRAY_SIZE(state_labels); i++)
+      const struct playlist_entry *entry  = NULL;
+      playlist_t *current_playlist        = playlist_get_cached();
+      const char *system_id;
+      size_t system_id_len;
+      size_t label_len;
+      
+      system_id      = (core_info->system_id) ? core_info->system_id : "core";
+      system_id_len  = strlen(system_id);
+
+      if (current_playlist)
       {
-         rjsonwriter_raw(jsonwriter, ",", 1);
-         rjsonwriter_raw(jsonwriter, " ", 1);
-         rjsonwriter_add_string(jsonwriter, state_labels[i]);
-         rjsonwriter_raw(jsonwriter, ":", 1);
-         rjsonwriter_raw(jsonwriter, " ", 1);
-#ifdef HAVE_ACCESSIBILITY
-         rjsonwriter_rawf(jsonwriter, "%u",
-               (input_st->ai_gamepad_state[i] ? 1 : 0));
-#else
-         rjsonwriter_rawf(jsonwriter, "%u", 0);
-#endif
+         playlist_get_index_by_path(
+               current_playlist, path_get(RARCH_PATH_CONTENT), &entry);
+
+         if (entry && !string_is_empty(entry->label))
+            label = entry->label;
       }
+
+      if (!label)
+         label = path_basename(path_get(RARCH_PATH_BASENAME));
+      
+      label_len          = strlen(label);
+      if (!(system_label = (char*)malloc(label_len + system_id_len + 3)))
+         return NULL;
+      
+      memcpy(system_label, system_id, system_id_len);
+      memcpy(system_label + system_id_len, "__", 2);
+      memcpy(system_label + 2 + system_id_len, label, label_len);
+      system_label[system_id_len + 2 + label_len] = '\0';
    }
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_raw(jsonwriter, "}", 1);
-   rjsonwriter_raw(jsonwriter, " ", 1);
-   rjsonwriter_raw(jsonwriter, "}", 1);
+   
+   return system_label;
+}
 
-   if (!(json_buffer = rjsonwriter_get_memory_buffer(jsonwriter, NULL)))
-      goto finish; /* ran out of memory */
+/**
+ * Creates and returns a JSON writer containing the payload to send alongside
+ * the translation request. {label} may be NULL, in which case no label will
+ * be supplied in the JSON. Returns NULL if the writer cannot be initialized.
+ */
+static rjsonwriter_t* build_request_json(
+      access_base64_t *image, access_request_t *request, 
+      access_frame_t *frame, char *label)
+{
+   unsigned i;
+   rjsonwriter_t* writer = NULL;
+   
+   if (!(writer = rjsonwriter_open_memory()))
+      return NULL;
 
-#ifdef DEBUG
-   if (access_st->ai_service_auto != 2)
-      RARCH_LOG("Request size: %d\n", bmp64_length);
-#endif
+   rjsonwriter_add_start_object(writer);
    {
-      char new_ai_service_url[PATH_MAX_LENGTH];
-      char separator                  = '?';
-      unsigned ai_service_source_lang = settings->uints.ai_service_source_lang;
-      unsigned ai_service_target_lang = settings->uints.ai_service_target_lang;
-      const char *ai_service_url      = settings->arrays.ai_service_url;
-      size_t _len                     = strlcpy(new_ai_service_url,
-            ai_service_url, sizeof(new_ai_service_url));
-
-      /* if query already exists in url, then use &'s instead */
-      if (strrchr(new_ai_service_url, '?'))
-          separator = '&';
-
-      /* source lang */
-      if (ai_service_source_lang != TRANSLATION_LANG_DONT_CARE)
+      rjsonwriter_add_string(writer, "image");
+      rjsonwriter_add_colon(writer);
+      rjsonwriter_add_string_len(writer, image->data, image->length);
+      
+      rjsonwriter_add_comma(writer);
+      rjsonwriter_add_string(writer, "format");
+      rjsonwriter_add_colon(writer);
+      rjsonwriter_add_string(writer, image->format);
+      
+      rjsonwriter_add_comma(writer);
+      rjsonwriter_add_string(writer, "coords");
+      rjsonwriter_add_colon(writer);
+      rjsonwriter_add_start_array(writer);
       {
-         const char *lang_source = ai_service_get_str(
-               (enum translation_lang)ai_service_source_lang);
-
-         if (!string_is_empty(lang_source))
-         {
-            new_ai_service_url[  _len] = separator;
-            new_ai_service_url[++_len] = '\0';
-            _len += strlcpy(new_ai_service_url + _len,
-                  "source_lang=",
-                  sizeof(new_ai_service_url)   - _len);
-            _len += strlcpy(new_ai_service_url + _len,
-                  lang_source,
-                  sizeof(new_ai_service_url)   - _len);
-            separator                  = '&';
-         }
+         rjsonwriter_add_unsigned(writer, frame->content_x);
+         rjsonwriter_add_comma(writer);
+         rjsonwriter_add_unsigned(writer, frame->content_y);
+         rjsonwriter_add_comma(writer);
+         rjsonwriter_add_unsigned(writer, frame->content_width);
+         rjsonwriter_add_comma(writer);
+         rjsonwriter_add_unsigned(writer, frame->content_height);
+      }
+      rjsonwriter_add_end_array(writer);
+      
+      rjsonwriter_add_comma(writer);
+      rjsonwriter_add_string(writer, "viewport");
+      rjsonwriter_add_colon(writer);
+      rjsonwriter_add_start_array(writer);
+      {
+         rjsonwriter_add_unsigned(writer, frame->viewport_width);
+         rjsonwriter_add_comma(writer);
+         rjsonwriter_add_unsigned(writer, frame->viewport_height);
       }
+      rjsonwriter_add_end_array(writer);
 
-      /* target lang */
-      if (ai_service_target_lang != TRANSLATION_LANG_DONT_CARE)
+      if (label)
       {
-         const char *lang_target = ai_service_get_str(
-               (enum translation_lang)ai_service_target_lang);
+         rjsonwriter_add_comma(writer);
+         rjsonwriter_add_string(writer, "label");
+         rjsonwriter_add_colon(writer);
+         rjsonwriter_add_string(writer, label);
+      }
 
-         if (!string_is_empty(lang_target))
+      rjsonwriter_add_comma(writer);
+      rjsonwriter_add_string(writer, "state");
+      rjsonwriter_add_colon(writer);
+      rjsonwriter_add_start_object(writer);
+      {
+         rjsonwriter_add_string(writer, "paused");
+         rjsonwriter_add_colon(writer);
+         rjsonwriter_add_unsigned(writer, (request->paused ? 1 : 0));
+         
+         for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++)
          {
-            new_ai_service_url[  _len] = separator;
-            new_ai_service_url[++_len] = '\0';
-            _len += strlcpy(new_ai_service_url + _len,
-                  "target_lang=",
-                  sizeof(new_ai_service_url)   - _len);
-            _len += strlcpy(new_ai_service_url + _len,
-                  lang_target,
-                  sizeof(new_ai_service_url)   - _len);
-            separator                  = '&';
-         }
+            rjsonwriter_add_comma(writer);
+            rjsonwriter_add_string(writer, ACCESS_INPUT_LABELS[i]);
+            rjsonwriter_add_colon(writer);
+            rjsonwriter_add_unsigned(writer, request->inputs[i]);
+         }      
+         rjsonwriter_add_end_object(writer);
       }
+      rjsonwriter_add_end_object(writer);
+   }
+ 
+   return writer;
+}
+
+/**
+ * Writes in the provided {buffer} the URL for the translation request. The 
+ * buffer is guaranteed to contain the server URL as well as an 'output' param
+ * specifying the accepted data types for this service.
+ */
+static void build_request_url(char *buffer, size_t length, settings_t *settings)
+{
+   char token[2];
+   size_t _len;
+   bool poke_supported              = false;
+   unsigned service_source_lang     = settings->uints.ai_service_source_lang;
+   unsigned service_target_lang     = settings->uints.ai_service_target_lang;
+   const char *service_url          = settings->arrays.ai_service_url;
+   unsigned ai_service_mode         = settings->uints.ai_service_mode;
+#ifdef HAVE_GFX_WIDGETS
+   video_driver_state_t *video_st   = video_state_get_ptr();
+   poke_supported                   = video_st->poke
+                                   && video_st->poke->load_texture
+                                   && video_st->poke->unload_texture;
+#endif
+      
+   _len = strlcpy(buffer, service_url, length);
+   buffer += _len;
+   length -= _len;
+   
+   token[1] = '\0';
+   if (strrchr(buffer, '?'))
+      token[0] = '&';
+   else
+      token[0] = '?';
+   
+   if (service_source_lang != TRANSLATION_LANG_DONT_CARE)
+   {
+      const char *lang_source 
+            = ai_service_get_str((enum translation_lang)service_source_lang);
 
-      /* mode */
+      if (!string_is_empty(lang_source))
       {
-         unsigned ai_service_mode   = settings->uints.ai_service_mode;
-         /*"image" is included for backwards compatability with
-          * vgtranslate < 1.04 */
+         _len = strlcpy(buffer, token, length);
+         buffer += _len;
+         length -= _len;
+
+         _len = strlcpy(buffer + _len, "source_lang=", length - _len);
+         buffer += _len;
+         length -= _len;
+
+         _len = strlcpy(buffer, lang_source, length);
+         buffer += _len;
+         length -= _len;
+         token[0] = '&';
+      }
+   }
+   
+   if (service_target_lang != TRANSLATION_LANG_DONT_CARE)
+   {
+      const char *lang_target 
+            = ai_service_get_str((enum translation_lang)service_target_lang);
+
+      if (!string_is_empty(lang_target))
+      {
+         _len = strlcpy(buffer, token, length);
+         buffer += _len;
+         length -= _len;
+
+         _len = strlcpy(buffer, "target_lang=", length);
+         buffer += _len;
+         length -= _len;
+
+         _len = strlcpy(buffer, lang_target, length);
+         buffer += _len;
+         length -= _len;
+         token[0] = '&';
+      }
+   }
+   
+   _len = strlcpy(buffer, token, length);
+   buffer += _len;
+   length -= _len;
 
-         new_ai_service_url[  _len] = separator;
-         new_ai_service_url[++_len] = '\0';
-         _len += strlcpy(new_ai_service_url          + _len,
-               "output=",
-               sizeof(new_ai_service_url)            - _len);
+   _len = strlcpy(buffer, "output=", length);
+   buffer += _len;
+   length -= _len;
 
-         switch (ai_service_mode)
+   switch (ai_service_mode)
+   {
+      case 0: /* Image Mode */
+         _len = strlcpy(buffer, "image,bmp", length);
+         buffer += _len;
+         length -= _len;
+#ifdef HAVE_RPNG
+         _len = strlcpy(buffer, ",png", length);
+         buffer += _len;
+         length -= _len;  
+         if (poke_supported)
          {
-            case 2:
-               strlcpy(new_ai_service_url       + _len,
-                     "text",
-                     sizeof(new_ai_service_url) - _len);
-               break;
-            case 1:
-            case 3:
-               _len += strlcpy(new_ai_service_url    + _len,
-                     "sound,wav",
-                     sizeof(new_ai_service_url)      - _len);
-               if (ai_service_mode == 1)
-                  break;
-               /* fall-through intentional for ai_service_mode == 3 */
-            case 0:
-               _len += strlcpy(new_ai_service_url    + _len,
-                     "image,png",
-                     sizeof(new_ai_service_url)      - _len);
-#ifdef HAVE_GFX_WIDGETS
-               if (     video_st->poke
-                     && video_st->poke->load_texture
-                     && video_st->poke->unload_texture)
-                  strlcpy(new_ai_service_url       + _len,
-                        ",png-a",
-                        sizeof(new_ai_service_url) - _len);
+            strlcpy(buffer, ",png-a", length);
+            buffer += _len;
+            length -= _len;
+         }
 #endif
-               break;
-            default:
-               break;
+         break;
+         
+      case 1: /* Speech Mode */
+         _len = strlcpy(buffer, "sound,wav", length);
+         buffer += _len;
+         length -= _len;
+         break;
+      
+      case 2: /* Narrator Mode */
+         _len = strlcpy(buffer, "text", length);
+         buffer += _len;
+         length -= _len;
+         break;
+         
+      case 3: /* Text Mode */
+      case 4: /* Text + Narrator */
+         _len = strlcpy(buffer, "text,subs", length);
+         buffer += _len;
+         length -= _len;
+         break;
+         
+      case 5: /* Image + Narrator */
+         _len = strlcpy(buffer, "text,image,bmp", length);
+         buffer += _len;
+         length -= _len;
+#ifdef HAVE_RPNG
+         _len = strlcpy(buffer, ",png", length);
+         buffer += _len;
+         length -= _len;  
+         if (poke_supported)
+         {
+            _len = strlcpy(buffer, ",png-a", length);
+            buffer += _len;
+            length -= _len;
          }
-
-      }
-#ifdef DEBUG
-      if (access_st->ai_service_auto != 2)
-         RARCH_LOG("SENDING... %s\n", new_ai_service_url);
 #endif
-      task_push_http_post_transfer(new_ai_service_url,
-            json_buffer, true, NULL, handle_translation_cb, NULL);
+         break;
    }
+}
 
-   error = false;
+/**
+ * Captures a frame from the currently running core and sends a request to the
+ * translation server. Processing and encoding this data comes with a cost, so
+ * it is offloaded to the task thread.
+ */
+static void translation_request_hndl(retro_task_t *task)
+{
+   access_request_t *request     = (access_request_t*)task->user_data;
+   settings_t *settings          = config_get_ptr();
+   access_state_t *access_st     = access_state_get_ptr();
+   access_frame_t *frame         = NULL;
+   access_base64_t *encode       = NULL;
+   char *label                   = NULL;
+   rjsonwriter_t *writer         = NULL;
+   const char *json              = NULL;
+   bool sent                     = false;
+   char url[PATH_MAX_LENGTH];
+   
+   if (task_get_cancelled(task))
+      goto finish;
+   
+   access_st->last_call = cpu_features_get_time_usec();
+   
+   frame = translation_grab_frame();
+   if (task_get_cancelled(task) || !frame)
+      goto finish;
+   
+   if (translation_dupe_fail(frame))
+      goto finish;
+      
+   encode = translation_frame_encode(frame);
+   if (task_get_cancelled(task) || !encode)
+      goto finish;
+   
+   label  = translation_get_content_label();
+   writer = build_request_json(encode, request, frame, label);
+   if (task_get_cancelled(task) || !writer)
+      goto finish;
+   
+   json = rjsonwriter_get_memory_buffer(writer, NULL);
+   build_request_url(url, PATH_MAX_LENGTH, settings);
+   if (task_get_cancelled(task) || !json)
+      goto finish;
+   
+#ifdef DEBUG
+   if (access_st->ai_service_auto == 0)
+      RARCH_LOG("[Translate]: Sending request to: %s\n", url);
+#endif
+   sent = true;
+   task_push_http_post_transfer(
+         url, json, true, NULL, translation_response_cb, NULL);
+   
 finish:
-   if (bit24_image_prev)
-      free(bit24_image_prev);
-   if (bit24_image)
-      free(bit24_image);
-
-   if (scaler)
-      free(scaler);
-
-   if (bmp_buffer)
-      free(bmp_buffer);
+   task_set_finished(task, true);
 
-   if (bmp64_buffer)
-      free(bmp64_buffer);
-   if (sys_lbl)
-      free(sys_lbl);
-   if (jsonwriter)
-      rjsonwriter_free(jsonwriter);
-   return !error;
+   if (frame && frame->data)
+      free(frame->data);
+   if (frame)
+      free(frame);
+   if (encode && encode->data)
+      free(encode->data);
+   if (encode)
+      free(encode);
+   if (label)
+      free(label);
+   if (writer)
+      rjsonwriter_free(writer);
+   if (request && request->inputs)
+      free(request->inputs);
+   if (request)
+      free(request);
+   
+   /* Plan next auto-request if this one was skipped */
+   if (!sent && access_st->ai_service_auto != 0)
+      call_auto_translate_task(settings);
 }
 
-#ifdef HAVE_ACCESSIBILITY
-bool is_narrator_running(bool accessibility_enable)
+/**
+ * Invokes the translation service. Captures a frame from the current content
+ * core and sends it over HTTP to the translation server. Once the server 
+ * responds, the translation data is displayed accordingly to the preferences
+ * of the user. Returns true if the request could be built and sent.
+ */
+bool run_translation_service(settings_t *settings, bool paused)
 {
-   access_state_t *access_st = access_state_get_ptr();
-   if (is_accessibility_enabled(
-            accessibility_enable,
-            access_st->enabled))
+   unsigned i;
+   retro_task_t *task               = NULL;
+   access_request_t *request        = NULL;
+   access_state_t *access_st        = access_state_get_ptr();
+#ifdef HAVE_ACCESSIBILITY
+   input_driver_state_t *input_st   = input_state_get_ptr();
+#endif
+
+   if (!(request = (access_request_t*)malloc(sizeof(access_request_t))))
+      goto failure;
+   
+#ifdef HAVE_THREADS
+   if (!access_st->image_lock)
    {
-      frontend_ctx_driver_t *frontend =
-         frontend_state_get_ptr()->current_frontend_ctx;
-      if (frontend && frontend->is_narrator_running)
-         return frontend->is_narrator_running();
+      if (!(access_st->image_lock = slock_new()))
+         goto failure;
    }
+#endif
+   
+   task = task_init();
+   if (!task)
+      goto failure;
+
+   /* Freeze frontend state while we're still running on the main thread */
+   request->paused = paused;  
+   request->inputs = (char*)malloc(
+         sizeof(char) * ARRAY_SIZE(ACCESS_INPUT_LABELS));
+         
+#ifdef HAVE_ACCESSIBILITY   
+   for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++)
+      request->inputs[i] = input_st->ai_gamepad_state[i] ? 1 : 0;
+#endif
+   
+   task->handler           = translation_request_hndl;
+   task->user_data         = request;
+   task->mute              = true;
+   access_st->request_task = task;
+   task_queue_push(task);
+   
    return true;
+   
+failure:
+   if (request)
+      free(request);
+
+   return false;
 }
-#endif