# Face Component


## GIF
* Faster
* Doesn't need crazy memory
* Works provided the 3 frames of "animation" are provided
* Doesn't sync to what's being spoken

### Imports

In [None]:
from PIL import Image
import imageio
from IPython.display import display, Image as IPImage
import numpy as np

### Function

In [None]:
#Make sure you have 3 "frames" for the looping gif animation to be able to be made.
#Upload them to the project and keep the names, you will need to paste those names into the function call in the appropriate LLM sections

def create_and_display_gif(images_paths, total_duration_seconds=5, duration_per_image=0.5):
    # Open images
    images = [Image.open(image_path) for image_path in images_paths]

    # Resize images to match dimensions
    dimensions = [(image.size[0], image.size[1]) for image in images]
    width = min(dim[0] for dim in dimensions)
    height = min(dim[1] for dim in dimensions)
    images_resized = [image.resize((width, height)) for image in images]

    # Calculate number of frames needed to achieve total duration
    num_images = len(images)
    num_frames = int(total_duration_seconds / duration_per_image)

    # Create GIF frames
    frames = (images_resized * (num_frames // num_images)) + images_resized[:num_frames % num_images]

    # Save GIF
    with imageio.get_writer('output.gif', mode='I', duration=duration_per_image) as writer:
        for frame in frames:
            writer.append_data(np.array(frame))

    # Display GIF
    display(IPImage('output.gif'))

## Deepfake model

*   [Source Repository](https://github.com/OpenTalker/SadTalker)
* [Source Video](https://youtu.be/fDgQcDL-qOc?si=gkpbfpA0c-U4h-0c)
* Slower
* Memory gated
* Some images may not be detected
* Gives a more fitting/matching animation



### Imports

In [None]:
### make sure that CUDA is available in Edit -> Nootbook settings -> GPU
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

In [None]:
!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.8 2
!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.9 1
!sudo apt install python3.8

!sudo apt-get install python3.8-distutils

!python --version

!apt-get update

!apt install software-properties-common

!sudo dpkg --remove --force-remove-reinstreq python3-pip python3-setuptools python3-wheel

!apt-get install python3-pip

print('Git clone project and install requirements...')
!git clone https://github.com/Winfredy/SadTalker &> /dev/null
%cd SadTalker
!export PYTHONPATH=/content/SadTalker:$PYTHONPATH
!python3.8 -m pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
!apt update
!apt install ffmpeg &> /dev/null
!python3.8 -m pip install -r requirements.txt

In [None]:
print('Download pre-trained models...')
!rm -rf checkpoints
!bash scripts/download_models.sh

### Function

In [None]:
#Before you run this cell, navigate to the file path on line 13 and paste your image into that folder, make sure it's in PNG format.
#In the event it cannot detect any facial landmarks you will have to try with another image

import ipywidgets as widgets
import glob
import matplotlib.pyplot as plt
print("Choose the image name to animate: (saved in folder 'examples/')")
img_list = glob.glob1('examples/source_image', '*.png')
img_list.sort()
img_list = [item.split('.')[0] for item in img_list]
default_head_name = widgets.Dropdown(options=img_list, value='full3')
def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))
        plt.axis('off')
        plt.show()
default_head_name.observe(on_change)
display(default_head_name)
plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))
plt.axis('off')
plt.show()

In [None]:
#Don't touch/edit, just run after you select from the first part
img = 'examples/source_image/{}.png'.format(default_head_name.value)

In [None]:
def anim_face(image, x):
  print(image)
  !python3.8 inference.py --driven_audio /content/captured_voice.wav \
            --source_image {image} \
            --result_dir ./results --still --preprocess full --enhancer gfpgan

  results = sorted(os.listdir('./results/'))

  mp4_name = glob.glob('./results/*.mp4')[x]

  mp4 = open('{}'.format(mp4_name),'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

  print('Display animation: {}'.format(mp4_name), file=sys.stderr)
  display(HTML("""
    <video width=256 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_url))


In [None]:
%cd /content

# Speech to text



## Local

Note, will not work on Google Colab as it cannot access your local microphone(s)

### Imports

In [None]:
!pip install speechrecognition

Defaulting to user installation because normal site-packages is not writeable


In [None]:
!pip install pyttsx3

Defaulting to user installation because normal site-packages is not writeable


In [None]:
!pip install wheel

[0m

In [None]:
!pip install wave

Collecting wave
  Downloading Wave-0.0.2.zip (38 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wave
  Building wheel for wave (setup.py) ... [?25l[?25hdone
  Created wheel for wave: filename=Wave-0.0.2-py3-none-any.whl size=1240 sha256=7d691ced262df0001f75c34fada05259acf02eb6184df6f163147e5e6736e1c9
  Stored in directory: /root/.cache/pip/wheels/25/e8/fe/458c7dac00c6abedad6380b9d0ef1a5cbc7c21807df1d30915
Successfully built wave
Installing collected packages: wave
Successfully installed wave-0.0.2
[0m

In [None]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libasound2-dev is already the newest version (1.2.6.1-1ubuntu1).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
Suggested packages:
  portaudio19-doc
The following NEW packages will be installed:
  libportaudio2 libportaudiocpp0 portaudio19-dev
0 upgraded, 3 newly installed, 0 to remove and 45 not upgraded.
Need to get 188 kB of archives.
After this operation, 927 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudio2 amd64 19.6.0-1.1 [65.3 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudiocpp0 amd64 19.6.0-1.1 [16.1 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 portaudio19-dev amd64 19.6.0-1.1 [106 kB]
Fetched 188 kB in 0s (945 kB/s)
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 123398 files and directories currently installed.)
Pre

In [None]:
!pip install setuptools




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: C:\Users\moham\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [None]:
import speech_recognition as sr
import pyttsx3
import pyaudio
import setuptools

### Main code

In [None]:
#see all available audio devices
p = pyaudio.PyAudio()
try:
    print(p.get_default_input_device_info())
except:
    print("No mics availiable")

{'index': 1, 'structVersion': 2, 'name': 'Microphone Array (Realtek(R) Au', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}


In [None]:
#main function
def getVoiceInput():
    x = 0
    r = sr.Recognizer()
    while(x!=1):
        try:
            with sr.Microphone(device_index=2) as source2: #device index gained from above, corresponds to your desired audio input
                print("Listening: ")
                r.adjust_for_ambient_noise(source2, duration=0.2)
                audio2 = r.listen(source2)
                MyText = r.recognize_google(audio2)
                MyText = MyText.lower()
                x = 1
                print("You said: {}".format(MyText))


        except sr.UnknownValueError:
            print("Could not read input, trying again")
            x = 0
    return MyText

## Colab Version
Note, requires a very decent microphone as the detection can be very finicky, it's recommended you speak directly into it loudly. It also doesnt hurt to wait a second or two after the code prompts you to start speaking



*   [Reference](https://colab.research.google.com/drive/1Z6VIRZ_sX314hyev3Gm5gBqvm1wQVo-a?usp=sharing)



### Imports

In [None]:
!pip install ffmpeg-python

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [None]:
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.3-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.3


In [None]:
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg
import scipy
import speech_recognition as sr
from typing_extensions import Text

### Main Code

In [None]:
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
    mimeType : 'audio/webm;codecs=opus'
    //mimeType : 'audio/webm;codecs=pcm'
  };
  //recorder = new MediaRecorder(stream, options);
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data);
    reader.onloadend = function() {
      base64data = reader.result;
      //console.log("Inside FileReader:" + base64data);
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording... press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      recordButton.innerText = "Saving the recording... pls wait!"
  }
}

// https://stackoverflow.com/a/951057
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  // wait 2000ms for the data to be available...
  // ideally this should use something like await...
  //console.log("Inside data:" + base64data)
  resolve(base64data.toString())

});

}
});

</script>
"""

def getVoiceRecording():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])

  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)

  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  src, audio = wav_read(io.BytesIO(riff))
  return audio, src

In [None]:
def getVoiceInput():
  audio, src = getVoiceRecording()
  scipy.io.wavfile.write('speechtotext.wav', src, audio)
  r = sr.Recognizer()
  text = ""

  audio = 'speechtotext.wav'

  with sr.AudioFile(audio) as source:
      audio = r.record(source)
      print('Done!')

  try:
      text = r.recognize_google(audio)

  except Exception as e:
      text = "{}".format(e)

  return text

# LLM

## GPT-3 Prompt Based
*   Pros:
      - Second most accurate to your character
      - Quicker to access pre-trained characters than manually training
      - Doesn't need intense hardware; not run locally
      - Can modify the initial prompt easier to allow for further customization
    


*   Cons:
      - Can only use a limited number of times freely before you need to pay
      - Sanitized nature of GPT-3 means dialogue and conversation will be quite censored




### Imports

In [None]:
!pip install openai

### Main(text input)

In [None]:
from openai import OpenAI

# Set your OpenAI API key
api_key = 'your-api-key'

# Set up OpenAI client
client = OpenAI(api_key=api_key)

# Main loop
while True:
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-16k",
        messages=[
            {
                "role": "system",
                "content": "You are Haru Okumura from the hit JRPG Persona 5" #Prompt here
            },
            {
                "role": "user",
                "content": input("You say: ")
            }
        ],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    # Get and print the system's response
    print(response.choices[0].message["content"]) #if you want text output
    #TTS(response.choices[0].message["content"]) #if you want audio output
    print()

### Main(audio input)

In [None]:
from openai import OpenAI

# Set your OpenAI API key
api_key = 'your-api-key'

# Set up OpenAI client
client = OpenAI(api_key=api_key)

# Main loop
while True:
    message_content = getVoiceInput()
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-16k",
        messages=[
            {
                "role": "system",
                "content": "You are Haru Okumura from the hit JRPG Persona 5" #Prompt here
            },
            {
                "role": "user",
                "content": message_content
            }
        ],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    # Get and print the system's response
    print(response.choices[0].message["content"]) #if you want text output
    #TTS(response.choices[0].message["content"]) #if you want audio output
    print()

## Character.ai API


*   [Repository](https://github.com/FalcoTK/PyCAI2)


*   Pros:
      - Most accurate to your character
      - Quicker to access pre-trained characters than manually training
      - Doesn't need intense hardware; not run locally


*   Cons:
      - Less customizability
      - Cannot be run locally; requires an internet connection. Makes it dangerous if their site goes down and/or you lose internet connection
      - Reliant on API functioning properly; any changes to it or the site could cause connection issues
      - Character AI terms and services means conversations with AI can be quite censored









### Install, Imports and Setup


In [None]:
!pip install PyCAI2

Collecting PyCAI2
  Downloading PyCAI2-2.1.1.tar.gz (6.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting curl_cffi (from PyCAI2)
  Downloading curl_cffi-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting websockets (from PyCAI2)
  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting easygoogletranslate (from PyCAI2)
  Downloading easygoogletranslate-0.0.4-py3-none-any.whl (3.9 kB)
Collecting pydub (from PyCAI2)
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Building wheels for collected packages: PyCAI2
  Building wheel for PyCAI2 (setup.py) ... [?25l[?25hdone
  Created wheel for PyC

In [None]:
import asyncio as ass # IMPORT LIB
from PyCAI2 import PyAsyncCAI2

### Main



#### Audio Input and Output

Comment out the message depending on what message type you want(audio or text) and same for output(audio or text)

In [None]:
owner_id = 'ENTER HERE' #refer to tutorial on where to find
char_id = "ENTER HERE" #refer to tutorial on where to find
room_id = "ENTER HERE" #refer to tutorial on where to find
#voice_target = "E:\\FOLDER\\FOLDER\\FOLDER\\FOLDER\\FOLDER"

clinet = PyAsyncCAI2(owner_id)

async def main():
    message = ""
    while(message != "end communication"):

      #INPUT---------------------------------------------------------------------------------------
      message = getVoiceInput() #for audio input
      #message = input("You: ") #for text input
      #--------------------------------------------------------------------------------------------

      async with clinet.connect(owner_id) as chat2:
          r = await chat2.send_message(char, message, "INSERT USERNAME HERE", Return_name=False)

          #OUTPUT(audio no visual)-----------------------------------------------------------------
          #print(r) #if you want text output
          TTS(r) #if you want audio output
          #-----------------------------------------------------------------------------------------

await main()

NameError: name 'PyAsyncCAI2' is not defined

#### GIF Face Output

In [None]:
owner_id = 'ENTER HERE' #refer to tutorial on where to find
char_id = "ENTER HERE" #refer to tutorial on where to find
room_id = "ENTER HERE" #refer to tutorial on where to find
#voice_target = "E:\\FOLDER\\FOLDER\\FOLDER\\FOLDER\\FOLDER"

clinet = PyAsyncCAI2(owner_id)

async def main():
    message = ""
    while(message != "end communication"):
      if(x != 0):
        time.sleep(5)
      #INPUT---------------------------------------------------------------------------------------
      message = getVoiceInput() #for audio input
      #message = input("You: ") #for text input
      #--------------------------------------------------------------------------------------------

      async with clinet.connect(owner_id) as chat2:
          r = await chat2.send_message(char, message, "INSERT USERNAME HERE", Return_name=False)

          #OUTPUT(visual)-----------------------------------------------------------------
          #print(r) #if you want text output
          TTS(r, True) #if you want audio output
          x = librosa.get_duration(filename='captured_voice.wav')
          create_and_display_gif(['FIRST PNG NAME HERE.png', 'SECOND PNG NAME HERE.png', 'THIRD PNG NAME HERE.png'], total_duration_seconds=x, duration_per_image=0.1)
          display(Audio('captured_voice.wav', autoplay=True))
          time.sleep(x)
          #-----------------------------------------------------------------------------------------

await main()

#### Deepfake Output

In [None]:
owner_id = 'ENTER HERE' #refer to tutorial on where to find
char_id = "ENTER HERE" #refer to tutorial on where to find
room_id = "ENTER HERE" #refer to tutorial on where to find
#voice_target = "E:\\FOLDER\\FOLDER\\FOLDER\\FOLDER\\FOLDER"

clinet = PyAsyncCAI2(owner_id)

async def main():
    x = 0
    message = ""
    while(message != "end communication"):
      %cd /content
      if(x != 0):
        time.sleep(5)
      #INPUT---------------------------------------------------------------------------------------
      message = getVoiceInput() #for audio input
      #message = input("You: ") #for text input
      #--------------------------------------------------------------------------------------------
      async with clinet.connect(owner_id) as chat2:
          r = await chat2.send_message(char, message, "INSERT USERNAME HERE", Return_name=False)

          #OUTPUT(visual)-----------------------------------------------------------------
          #print(r) #if you want text output
          TTS(r, True) #if you want audio output
          %cd /content/SadTalker
          anim_face(img, x)
          x+=1
          #-----------------------------------------------------------------------------------------

await main()

## Local Trained(Falcon-7B)

Two options, either train a new model or run a pre-trained one. Can work with falcon 7b, mistral 7b and llama 7b

*   [Source](https://youtu.be/Q9zv369Ggfk?si=dqNtsncMydLrYB0N)

*   Pros:
      - Can be customized to fit the users needs
      - Training, loading and running don't require crazy hardware; model can be downloaded locally to run whenever
      - Doesn't need an internet connection as it's run locally, allowing you to access it whenever you need
      - Local Fine-Tuned LLM avoids censoring issues


*   Cons:
      - Results may vary based on several factors; dataset(i;e amount and quality of data), training time, model, parameters, etc
      - Can take some time to load the model despite it's size
      - Needs a GPU

### Train new model

#### Installs, Imports and Setup

In [None]:
!python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui

Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/bitsandbytes-windows-webui
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
!pip install -Uqqq pip
!pip install -qqq torch==2.0.1
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1

[31mERROR: Operation cancelled by user[0m[31m
[0mTraceback (most recent call last):
  File "/usr/local/bin/pip3", line 5, in <module>
    from pip._internal.cli.main import main
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/main.py", line 10, in <module>
    from pip._internal.cli.autocompletion import autocomplete
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/autocompletion.py", line 10, in <module>
    from pip._internal.cli.main_parser import create_main_parser
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/main_parser.py", line 9, in <module>
    from pip._internal.build_env import get_runnable_pip
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/build_env.py", line 19, in <module>
    from pip._internal.cli.spinners import open_spinner
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/spinners.py", line 9, in <module>
    from pip._internal.utils.logging import get_indentation
  File "/usr/

In [None]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
notebook_login()

#### Load model

In [None]:
MODEL_NAME = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
  )

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

#### Prepare dataset

In [None]:
data = load_dataset("csv", data_files="INSERT CSV DATASET HERE")
#data
#data["train"][0]

In [None]:
def generate_prompt(data_point):
  return f"""
<human>: {data_point["User"]}
<assistant>: {data_point["Prompt"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [None]:
#data = data["train"].shuffle().map(generate_and_tokenize_prompt)
#data

#### Finetune the model

In [None]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()

#### Save the model

In [None]:
model.save_pretrained("trained-model")

In [None]:
PEFT_MODEL = "TYPE THE NAME THAT YOU WOULD LIKE TO SAVE THE MODEL AS HERE"

model.push_to_hub(
    PEFT_MODEL, use_auth_token=True
)

### Run Trained Model


#### Imports

In [None]:
!python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui

Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/bitsandbytes-windows-webui
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->bitsandbytes)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->bitsandbytes)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->bitsandbytes)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->bitsandbytes)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from 

In [None]:
!pip install -Uqqq pip
!pip install -qqq torch==2.0.1
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m105.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m87.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.4/168.4 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### Run the model

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
PEFT_MODEL = "NAME OF MODEL THAT YOU SAVED"

In [None]:
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/410 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

##### Audio Input and Output

Comment out the message depending on what message type you want(audio or text) and same for output(audio or text)

In [None]:
%%time
device = "cuda:0"

while True:

    #INPUT---------------------------------------------------------------------------------------
    #user_input = getVoiceInput() #for audio input
    user_input = input("You: ") #for text input
    #--------------------------------------------------------------------------------------------

    if user_input.lower() == "end communication":
        print("Ending the conversation.")
        break

    # Incorporate user input into the prompt
    prompt = f"<human>: {user_input}\n<assistant>:"

    # Encode the prompt using the tokenizer
    encoding = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate a response from the model based on the encoded prompt
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config
        )

    # Decode the output tokens of the model to obtain the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    #OUTPUT(audio no visual)-----------------------------------------------------------------
    print("Assistant:", response) #if you want text output
    #TTS(response) #if you want audio output
    #-----------------------------------------------------------------------------------------

You: Good morning
Assistant: <human>: Good morning
<assistant>: Good morning Haru.
<assistant>: Good morning.
<assistant>: I'm glad you're here.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm glad you're here too.
<assistant>: I'm
You: how have you been?
Assistant: <human>: how have you been?
<assistant>: I've been doing well.
<assistant>: I'm glad to hear that.
<assistant>: I've been doing well.
<assistant>: I'm glad to hear that.
<assistant>: I've been doing well.
<assistant>: I'm glad to hear that.
<assistant>: I've been doing well.
<assistant>: I'm glad to hear that.
<assistant>: I've been doing well.
<assistant>: I'm glad to he

##### GIF Face output

In [None]:
%%time
device = "cuda:0"

while True:

    #INPUT---------------------------------------------------------------------------------------
    #user_input = getVoiceInput() #for audio input
    user_input = input("You: ") #for text input
    #--------------------------------------------------------------------------------------------

    if user_input.lower() == "end communication":
        print("Ending the conversation.")
        break

    # Incorporate user input into the prompt
    prompt = f"<human>: {user_input}\n<assistant>:"

    # Encode the prompt using the tokenizer
    encoding = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate a response from the model based on the encoded prompt
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config
        )

    # Decode the output tokens of the model to obtain the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    #OUTPUT(visual)-----------------------------------------------------------------
    #print("Assistant:", response) #if you want text output
    TTS(response, True) #if you want audio output
    x = librosa.get_duration(filename='captured_voice.wav')
    create_and_display_gif(['FIRST PNG NAME HERE.png', 'SECOND PNG NAME HERE.png', 'THIRD PNG NAME HERE.png'], total_duration_seconds=x, duration_per_image=0.1)
    display(Audio('captured_voice.wav', autoplay=True))
    time.sleep(x)
    #-----------------------------------------------------------------------------------------

##### Deepfake Output

In [None]:
%%time
device = "cuda:0"
x = 0
while True:
    %cd /content

    #INPUT---------------------------------------------------------------------------------------
    #user_input = getVoiceInput() #for audio input
    user_input = input("You: ") #for text input
    #--------------------------------------------------------------------------------------------

    if user_input.lower() == "end communication":
        print("Ending the conversation.")
        break

    if(x != 0):
      time.sleep(5)

    # Incorporate user input into the prompt
    prompt = f"<human>: {user_input}\n<assistant>:"

    # Encode the prompt using the tokenizer
    encoding = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate a response from the model based on the encoded prompt
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config
        )

    # Decode the output tokens of the model to obtain the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    #OUTPUT(visual)-----------------------------------------------------------------
    #print("Assistant:", response) #if you want text output
    TTS(response, True) #if you want audio output
    %cd /content/SadTalker
    anim_face(img, x)
    x+=1
    #-----------------------------------------------------------------------------------------

## LLM-Engine

Two options, either train a new model or run a pre-trained one

*   [Source](https://llm-engine.scale.com/guides/fine_tuning/)
*   Pros:
      - Easy training, simply format data and they handle the rest
      - Training, loading, and running require no hardware or stable connection of your own, their servers handle all that
      - Quick and easy to customize
      - Local Fine-Tuned LLM avoids censoring issues


*   Cons:
      - Results may vary based on several factors; dataset(i;e amount and quality of data), training time, model, parameters, etc
      - Limited number of trainable models available, and can only have 5 fine-tuned ones
      - Cannot be run locally, requires a stable connection

### Train new model

#### Install, Imports and Setup

In [None]:
!pip install scale-llm-engine




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: C:\Users\moham\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [None]:
import llmengine
from llmengine import File, FineTune, Model, Completion
import json

In [None]:
llmengine.api_engine.set_api_key("API KEY HERE")

#### Training

In [None]:
response = File.upload(open("CSV TRAINING FILE HERE", "r")) #load your dataset here
print(response.json())

{"id": "file-n2g-1sNp_jOMynO"}


In [None]:
#choose your model here, training_file is equal to the output of the previous cell, and suffix will decide the suffix of your model name
response = FineTune.create(
    model="llama-7b",
    training_file="file-n2g-1sNp_jOMynO", #copy paste from above response
    suffix="harutest"
)

print(response.json())

{"id": "ft-cnufbp2e64v0032ir010"}


In [None]:
fine_tune_id = "ft-cnufbp2e64v0032ir010" #copy paste from above response
fine_tune = FineTune.get(fine_tune_id)
print(fine_tune.status)  # BatchJobStatus.RUNNING
print(fine_tune.fine_tuned_model)  # "llama-2-7b.700101-000000

fine_tune_events = FineTune.get_events(fine_tune_id)
for event in fine_tune_events.events:
    print(event)

BatchJobStatus.SUCCESS
llama-7b.harutest.240322-030715
timestamp=1711077385.588144 message="{'loss': 2.7246, 'learning_rate': 0.0018888354486549235, 'epoch': 0.95}" level='info'
timestamp=1711077389.3509204 message="{'eval_loss': 2.1973655223846436, 'eval_runtime': 3.405, 'eval_samples_per_second': 15.565, 'eval_steps_per_second': 15.565, 'epoch': 0.95}" level='info'
timestamp=1711077457.0481977 message="{'loss': 2.1224, 'learning_rate': 0.0014154150130018866, 'epoch': 1.89}" level='info'
timestamp=1711077460.5960732 message="{'eval_loss': 2.0320541858673096, 'eval_runtime': 3.1915, 'eval_samples_per_second': 16.607, 'eval_steps_per_second': 16.607, 'epoch': 1.89}" level='info'
timestamp=1711077528.8894107 message="{'loss': 1.5711, 'learning_rate': 0.0006729320366825784, 'epoch': 2.97}" level='info'
timestamp=1711077532.5076933 message="{'eval_loss': 1.9354851245880127, 'eval_runtime': 3.2596, 'eval_samples_per_second': 16.26, 'eval_steps_per_second': 16.26, 'epoch': 2.97}" level='info

In [None]:
response = Model.list()
print(response.json())

{"model_endpoints": [{"id": "end_cnube0dodf4g03skmaeg", "name": "mistral-7b-instruct.harutest.240321-222320", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cnuc36cu1tj003s1hse0", "name": "mistral-7b.harutest.240321-225949", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cj25n8c2i4b003tdav4g", "name": "llama-2-70b", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "text_generation_inference", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cnppsgblqjl003ct8do0", "name": "mixtral-8x7b-instruct-loadtest", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_f

### Run Trained Model


#### Imports

In [None]:
!pip install scale-llm-engine




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: C:\Users\moham\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [None]:
import llmengine
from llmengine import File, FineTune, Model, Completion
import json

A newer version (0.0.0b30) of 'scale-llm-engine' is available. Please upgrade!
To upgrade, run: pip install --upgrade scale-llm-engine
Don't want to see this message? Set the environment variable 'LLM_ENGINE_DISABLE_VERSION_CHECK' to 'true'.


In [None]:
llmengine.api_engine.set_api_key("API KEY HERE")

In [None]:
#Print all available models
response = Model.list()
print(response.json())

{"model_endpoints": [{"id": "end_cnube0dodf4g03skmaeg", "name": "mistral-7b-instruct.harutest.240321-222320", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cku1n62c6rb003ksbbb0", "name": "mistral-7b-instruct", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cnudjrnevi1003476bs0", "name": "mpt-7b-instruct.harutest.240322-003724", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shards": null, "quantize": null, "spec": null}, {"id": "end_cl04kthti5ag033fu65g", "name": "llama-7b", "model_name": null, "source": "hugging_face", "status": "READY", "inference_framework": "vllm", "inference_framework_tag": null, "num_shar

#### Main code

##### Audio Input and Output

Comment out the message depending on what message type you want(audio or text) and same for output(audio or text)

In [None]:
user_input = ""
while(user_input != "end"):

  #OUTPUT(audio no visual)-----------------------------------------------------------------
  #user_input = input("Enter you're message here: ") #for text input
  user_input = getVoiceInput() #for audio input
  #----------------------------------------------------------------------------------------

  response = Completion.create(
      model="TYPE NAME OF THE ONE YOU WANT FROM ABOVE",
      prompt=user_input,
      max_new_tokens=100,
      temperature=0.2,
  )

  x = response.json()
  response_json = json.loads(x)
  text = response_json['output']['text']

  #OUTPUT(audio no visual)-----------------------------------------------------------------
  print(text) #if you want text output
  #TTS(text) #if you want audio output
  #----------------------------------------------------------------------------------------

  #to verify the authenticity of the extracted text
  #print(response.json())

##### GIF Face output

In [None]:
#1. llama-2-70b model: "llama-2-70b.harutest.240322-013040"
#2. llama-2-7b model: "llama-2-7b.harutest.240318-021554"
#3. mpt-7b-instruct model: "mpt-7b-instruct.harutest.240322-003724"
#4. mistral-7b-instruct model: "mistral-7b-instruct.harutest.240321-222320"
#5. llama-7b model: model: "llama-7b.harutest.240322-030715"

user_input = ""
while(user_input != "end"):

  #OUTPUT(audio no visual)-----------------------------------------------------------------
  #user_input = input("Enter you're message here: ") #for text input
  user_input = getVoiceInput() #for audio input
  #----------------------------------------------------------------------------------------

  response = Completion.create(
      model="TYPE NAME OF THE ONE YOU WANT FROM ABOVE",
      prompt=user_input,
      max_new_tokens=100,
      temperature=0.2,
  )

  x = response.json()
  response_json = json.loads(x)
  text = response_json['output']['text']

  #OUTPUT(visual)-----------------------------------------------------------------
  #print(text) #if you want text output
  TTS(text, True) #if you want audio output
  x = librosa.get_duration(filename='captured_voice.wav')
  create_and_display_gif(['FIRST PNG NAME HERE.png', 'SECOND PNG NAME HERE.png', 'THIRD PNG NAME HERE.png'], total_duration_seconds=x, duration_per_image=0.1)
  display(Audio('captured_voice.wav', autoplay=True))
  time.sleep(x)
  #----------------------------------------------------------------------------------------

  #to verify the authenticity of the extracted text
  #print(response.json())

##### Deepfake Output

In [None]:
#1. llama-2-70b model: "llama-2-70b.harutest.240322-013040"
#2. llama-2-7b model: "llama-2-7b.harutest.240318-021554"
#3. mpt-7b-instruct model: "mpt-7b-instruct.harutest.240322-003724"
#4. mistral-7b-instruct model: "mistral-7b-instruct.harutest.240321-222320"
#5. llama-7b model: model: "llama-7b.harutest.240322-030715"

x = 0
user_input = ""
while(user_input != "end"):
   %cd /content
   if(x != 0):
      time.sleep(5)

  #OUTPUT(audio no visual)-----------------------------------------------------------------
  #user_input = input("Enter you're message here: ") #for text input
  user_input = getVoiceInput() #for audio input
  #----------------------------------------------------------------------------------------

  response = Completion.create(
      model="TYPE NAME OF THE ONE YOU WANT FROM ABOVE",
      prompt=user_input,
      max_new_tokens=100,
      temperature=0.2,
  )

  x = response.json()
  response_json = json.loads(x)
  text = response_json['output']['text']

  #OUTPUT(visual)-----------------------------------------------------------------
  #print(text) #if you want text output
  TTS(text, True) #if you want audio output
  %cd /content/SadTalker
  anim_face(img, x)
  x+=1
  #----------------------------------------------------------------------------------------

  #to verify the authenticity of the extracted text
  #print(response.json())

# Text to Speech

## Tacotron Model
- [Reference](https://colab.research.google.com/drive/1N8lXviiwfmrS9vYzuFdQnLHlddXMMMvU#scrollTo=vF1d-My9IXXt)
- Needs a GPU; uses a lot of memory
- Quite slow
- Customizable to be the most accurate to your characters voice; depends on quality of training files

### Imports and Installs

In [None]:
!pip3 install -U scipy

!git clone https://github.com/jnordberg/tortoise-tts.git
%cd tortoise-tts
!pip3 install -r requirements.txt
!pip3 install transformers==4.19.0 einops==0.5.0 rotary_embedding_torch==0.1.5 unidecode==1.3.5
!python3 setup.py install

import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython
from IPython.display import Audio, display

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

tts = TextToSpeech()

import os
import time
import librosa
from google.colab import files

### Read voice clips

In [None]:
CUSTOM_VOICE_NAME = "custom" #make sure to change to an appropriate name

In [None]:
#Make sure the audio file(s) you upload are as clear and crisp as possible
#The more you upload and the better quality they are, the closer the mimic will be
#Additionally, the files must be in a .wav format

custom_voice_folder = f"tortoise/voices/{CUSTOM_VOICE_NAME}"
os.makedirs(custom_voice_folder)
for i, file_data in enumerate(files.upload().values()):
  with open(os.path.join(custom_voice_folder, f'{i}.wav'), 'wb') as f:
    f.write(file_data)

### Function

In [None]:
import locale
print(locale.getpreferredencoding())

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
def TTS(text, for_face=False):

  %cd /content/tortoise-tts
  voice_samples, conditioning_latents = load_voice(CUSTOM_VOICE_NAME)
  gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents,
                            preset="high_quality")
  %cd /content
  torchaudio.save('captured_voice.wav', gen.squeeze(0).cpu(), 24000)
  if(for_face == False):
    display(Audio('captured_voice.wav', autoplay=True))
    x = librosa.get_duration(filename='captured_voice.wav')
    time.sleep(x)
  #os.remove(f'generated-{CUSTOM_VOICE_NAME}.wav')

In [None]:
%cd /content

## Google Translate TTS
- Doesn't need crazy memory
- Fast
- Won't be accurate to your character

### Install and Import

In [None]:
!pip install gTTS

Collecting gTTS
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Installing collected packages: gTTS
Successfully installed gTTS-2.5.1


In [None]:
from IPython.display import Audio, display
from gtts import gTTS
import os
import librosa
import time

### Main Function

In [None]:
def TTS(text, for_face=False):
    speak = gTTS(text)
    speak.save("captured_voice.wav")
    if(for_face == False):
      display(Audio('captured_voice.wav', autoplay=True))
      x = librosa.get_duration(filename='captured_voice.wav')
      time.sleep(x)
    #os.remove('captured_voice.wav')