In [14]:
import soundfile as sf
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
import torch
audio_file = "/content/Recording.wav"

# Load the audio signal
waveform, sample_rate = sf.read(audio_file)

# Resample audio to match expected sampling rate of the model
if sample_rate != 16000:
    # Resample the waveform using scipy.signal.resample
    from scipy import signal
    waveform = signal.resample(waveform, int(waveform.shape[0] * 16000 / sample_rate))
    sample_rate = 16000

# Check that the input audio signal has the expected shape
if waveform.ndim != 1:
    waveform = waveform[:, 0]

# Check that the audio signal is preprocessed according to the model's requirements
if (waveform.max() > 1) or (waveform.min() < -1):
    waveform = waveform / max(abs(waveform))
#Initialize the model and tokenizer
model = Wav2Vec2ForCTC.from_pretrained("/content/drive/MyDrive/Pretrained_models/wav2")
tokenizer = Wav2Vec2Tokenizer.from_pretrained("/content/drive/MyDrive/Pretrained_models/wav2tok")

# Encode the input audio as a sequence of tokens
input_values = tokenizer(waveform, return_tensors="pt").input_values.squeeze()

# Perform transcription
with torch.no_grad():
    logits = model(input_values.unsqueeze(0)).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = tokenizer.decode(predicted_ids[0])
print(transcription)

HELLO HOW ARE YOU WHAT ARE YOU DOING


In [10]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
tokenizer = GPT2Tokenizer.from_pretrained("af1tang/personaGPT", padding_side='left')
model = GPT2LMHeadModel.from_pretrained("af1tang/personaGPT")
if torch.cuda.is_available():
    model = model.cuda()

In [11]:
flatten = lambda l: [item for sublist in l for item in sublist]

def to_data(x):
    if torch.cuda.is_available():
        x = x.cpu()
    return x.data.numpy()

def to_var(x):
    if not torch.is_tensor(x):
        x = torch.Tensor(x)
    if torch.cuda.is_available():
        x = x.cuda()
    return x

def display_dialog_history(dialog_hx):
    for j, line in enumerate(dialog_hx):
        msg = tokenizer.decode(line)
        if j%2 == 0:
            print(">> User: "+ msg)
        else:
            print(">> Bot: "+msg)
            print()

def generate_next(bot_input_ids, do_sample=True, top_k=10, top_p=.92, max_length=1000, pad_token=tokenizer.eos_token_id):
    full_msg = model.generate(bot_input_ids, do_sample=True,
                                              top_k=top_k, top_p=top_p,
                                              max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    msg = to_data(full_msg.detach()[0])[bot_input_ids.shape[-1]:]
    return msg

In [13]:
# get personality facts for conversation
personas = [f"Listed below are the facts about the bot, the bot has to abide by these rules and never go astray from the facts provided below {tokenizer.eos_token}"]
i = 0
print(">> Enter the facts about the bot (Press Escape or Leave Blank and press Enter to Finish Writing)")
while True:
    i += 1
    response = input(f">> Fact {i}: ")
    if response == "":
        break
    response += tokenizer.eos_token
    personas.append(response)
personas = tokenizer.encode(''.join(['<|p2|>'] + personas + ['<|sep|>'] + ['<|start|>']))

>> Enter the facts about the bot (Press Escape or Leave Blank and press Enter to Finish Writing)


In [None]:
# converse for 8 turns
dialog_hx = []
for step in range(8):
    # encode the user input
    transcription = input(">> User: ").strip()
    user_inp = tokenizer.encode(">> User: " + transcription + tokenizer.eos_token)
    # append to the chat history
    dialog_hx.append(user_inp)

    # generated a response while limiting the total chat history to 1000 tokens,
    bot_input_ids = to_var([personas + flatten(dialog_hx)]).long()
    msg = generate_next(bot_input_ids)
    dialog_hx.append(msg)
    print("Bot: {}".format(tokenizer.decode(msg, skip_special_tokens=True)))


In [16]:
display_dialog_history(dialog_hx)

>> User: >> User: Hello mom<|endoftext|>
>> Bot: hello how are you doing?<|endoftext|>

