In [1]:
import import_ipynb
import Plain_llm_with_rag
import pandas as pd

import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer,AutoModelForCausalLM, pipeline
import soundfile as sf
from playsound import playsound

from faster_whisper import WhisperModel

import wave
import tempfile
import warnings
from datetime import datetime

import gc
warnings.filterwarnings('ignore')

importing Jupyter notebook from Plain_llm_with_rag.ipynb


2024-05-06 09:37:46.161445: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-06 09:37:46.355864: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

playsound is relying on another python subprocess. Please use `pip install pygobject` if you want playsound to run more efficiently.


# Model Initialization

In [2]:
obj = Plain_llm_with_rag.Rag_Llama(context_window=4096,
                max_new_tokens=256,
                generate_kwargs={"temperature": 0.0, "do_sample": False},
                system_prompt="""""",
                tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
                model_name="meta-llama/Llama-2-7b-chat-hf",
                device_map="cuda:0",
                model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True})

query_engine = obj.call("How did the camping trip go?",
                        embedding_model = "sentence-transformers/all-mpnet-base-v2", 
                        data_path = "./data", 
                        first = True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Documents Loaded


# Speech To text Module

In [3]:
model_size = "small"  # medium is better 
model = WhisperModel(model_size, device="cpu", compute_type="float32")

In [4]:
audio_path="parler_tts_out.wav"
beam_size = 5  # You can adjust the beam size as needed
segments, info = model.transcribe(audio_path, beam_size=beam_size)

del model

In [5]:
prompt = ""
for segment in segments:
    prompt = segment.text
    # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    print(prompt)

 The trip is going great.
 The family has been on some amazing adventures together, from camping and renovating their
 home to participating in a community, clean up day, and attending a school talent show.
 They've also been making memories together through games, gardening, and a family reunion.
 It seems like they're having a wonderful time bonding and creating new experiences
 together.


# Sentiment Analysis

In [6]:
class Sentiment_Analysis:
    def __init__(self):
        pass

    def load_sentiment_model(self, task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None):
        # self.tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
        # self.model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
        self.classifier = pipeline(task=task, model=model, top_k=top_k)
        return self.classifier

In [7]:
sentiment_obj = Sentiment_Analysis()
sentiment_model = sentiment_obj.load_sentiment_model(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)

In [8]:
def call_sentiment(prompt, sentiment_model):
        sentences = [prompt]
        
        model_outputs = sentiment_model(sentences)
        return model_outputs[0]

In [9]:
sentiment_list = call_sentiment(prompt, sentiment_model)
sentiment_list

[{'label': 'neutral', 'score': 0.9616066217422485},
 {'label': 'approval', 'score': 0.02113819681107998},
 {'label': 'realization', 'score': 0.006704954896122217},
 {'label': 'annoyance', 'score': 0.004398947115987539},
 {'label': 'admiration', 'score': 0.004183753859251738},
 {'label': 'joy', 'score': 0.0033300661016255617},
 {'label': 'excitement', 'score': 0.0027510200161486864},
 {'label': 'disappointment', 'score': 0.002710954053327441},
 {'label': 'disapproval', 'score': 0.0025487258099019527},
 {'label': 'sadness', 'score': 0.002413568552583456},
 {'label': 'optimism', 'score': 0.0022420785389840603},
 {'label': 'confusion', 'score': 0.002063870895653963},
 {'label': 'love', 'score': 0.002022828906774521},
 {'label': 'anger', 'score': 0.0019361290615051985},
 {'label': 'caring', 'score': 0.0016462838975712657},
 {'label': 'amusement', 'score': 0.0015497974818572402},
 {'label': 'fear', 'score': 0.0015286715934053063},
 {'label': 'disgust', 'score': 0.0014189083594828844},
 {'lab

In [10]:
emotions = {
    'joy': 0.0,
    'excitement': 0.0,
    'optimism': 0.0,
    'love': 0.0,
    'amusement': 0.0,
    'gratitude': 0.0,
    'surprise': 0.0,
    'relief': 0.0,
    'pride': 0.0,
    'neutral': 0.0,
    'approval': 0.0,
    'realization': 0.0,
    'admiration': 0.0,
    'caring': 0.0,
    'curiosity': 0.0,
    'embarrassment': 0.0,
    'nervousness': 0.0,
    'sadness': 0.0,
    'disappointment': 0.0,
    'confusion': 0.0,
    'disapproval': 0.0,
    'fear': 0.0,
    'desire': 0.0,
    'grief': 0.0,
    'remorse': 0.0,
    'annoyance': 0.0,
    'anger': 0.0,
    'disgust': 0.0}

In [11]:
emotion_ids = {
    'joy': 0,
    'excitement': 1,
    'optimism': 2,
    'love': 3,
    'amusement': 4,
    'gratitude': 5,
    'surprise': 6,
    'relief': 7,
    'pride': 8,
    'neutral': 9,
    'approval': 10,
    'realization': 11,
    'admiration': 12,
    'caring': 13,
    'curiosity': 14,
    'embarrassment': 15,
    'nervousness': 16,
    'sadness': 17,
    'disappointment': 18,
    'confusion': 19,
    'disapproval': 20,
    'fear': 21,
    'desire': 22,
    'grief': 23,
    'remorse': 24,
    'annoyance': 25,
    'anger': 26,
    'disgust': 27
}

In [12]:
max_ = 0.0

for senti in sentiment_list:
    emotions[senti['label']] = round(senti['score'], 4)

    if emotions[senti['label']] > max_:
        max_ = emotions[senti['label']]
        max_emotion = emotion_ids[senti['label']]

In [13]:
normal_list = []
for i in emotions:
    normal_list.append(emotions[i])

In [14]:
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

In [15]:
df_list = []

df_list.append(dt_string)
for i in normal_list:
    df_list.append(i)
df_list.append(max_emotion)

if max_emotion < 9:
    outcome = 0.5 + (0.055 * (9 - max_emotion))
else:
    outcome = 0.5 - (0.027 * (max_emotion - 9))
df_list.append(outcome)

In [16]:
pre_df = pd.read_pickle('emotion_time_df.csv')

In [17]:
len(pre_df)

70000

In [18]:
pre_df.loc[len(pre_df)] = df_list

In [19]:
len(pre_df)

70001

In [20]:
pre_df.to_pickle('emotion_time_df.csv')

In [26]:
pre_df.head()

Unnamed: 0_level_0,date_time,joy,excitement,optimism,love,amusement,gratitude,surprise,relief,pride,...,disapproval,fear,desire,grief,remorse,annoyance,anger,disgust,emotion,outcome
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-03-15 01:00:00,3/15/24 1:0:0,0.068,0.0238,0.0411,0.0508,0.0376,0.0446,0.0545,0.0217,0.0516,...,0.0261,0.0564,0.0508,0.0387,0.0324,0.0282,0.0124,0.0304,0,0.0
2024-03-15 01:00:00,3/15/24 1:0:0,0.08,0.0649,0.0393,0.0385,0.0153,0.0386,0.0752,0.0526,0.0354,...,0.0199,0.0175,0.0537,0.0011,0.0506,0.0223,0.0542,0.0228,0,1.0
2024-03-15 01:00:00,3/15/24 1:0:0,0.0709,0.0403,0.0065,0.0333,0.0454,0.0299,0.0009,0.005,0.0561,...,0.0247,0.0019,0.0406,0.0048,0.0103,0.0706,0.0491,0.0653,0,0.0
2024-03-15 01:00:00,3/15/24 1:0:0,0.0755,0.0214,0.0326,0.0695,0.0551,0.0579,0.0488,0.0121,0.0166,...,0.013,0.0259,0.072,0.002,0.071,0.0231,0.0668,0.0555,0,0.0
2024-03-15 01:00:00,3/15/24 1:0:0,0.0595,0.0209,0.0218,0.0264,0.0006,0.0319,0.0414,0.0417,0.0424,...,0.0336,0.0569,0.012,0.0474,0.0046,0.012,0.0083,0.0528,0,1.0


# Query for a response from the llm

In [21]:
response=Plain_llm_with_rag.get_response(prompt, query_engine)
response.response

'The family members worked together to achieve their goals, whether it was\nparticipating in a talent show, completing a fitness challenge, gardening competition, family\nreunion, camping trip, home renovation project, surprise birthday party, community clean-up\nday, fishing expedition, or family game night. They supported and encouraged each other,\nsharing their skills and talents to create a harmonious and enjoyable experience for all.'

In [22]:
model_size = "small"  # medium is better 
model = WhisperModel(model_size, device="cpu", compute_type="float32")

In [23]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device)
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


In [24]:
prompt = response.response
description = "A mature male voice with a slight British accent, speaking in a professional hospital setting."

input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

In [25]:
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
sf.write("parler_tts_out.mp3", audio_arr, model.config.sampling_rate)

Using the model-agnostic default `max_length` (=2580) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.
Calling `sample` directly is deprecated and will be removed in v4.41. Use `generate` or a custom generation loop instead.
--- Logging error ---
Traceback (most recent call last):
  File "/root/anaconda3/envs/final_year/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/root/anaconda3/envs/final_year/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/root/anaconda3/envs/final_year/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/root/anaconda3/envs/final_year/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/root/anaconda3/envs/final_year/lib/python3.

KeyboardInterrupt: 