## Libraries And Modules

In [1]:
import whisper
import numpy as np
import transformers
from transformers import pipeline
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer

## FunctionDefinitions

In [None]:
def free_gpu_memory():

  if torch.cuda.is_available():
    torch.cuda.empty_cache()
  
  gc.collect()
  torch.cuda.empty_cache()

In [None]:
device = 0 if torch.cuda.is_available() else -1 

## WhisperTranscription(SpeechToText)

In [4]:
def transcribe_audio(file_path, model_type, language):
    model = whisper.load_model(model_type)
    result = model.transcribe(file_path,
                              language=language,
                              task="transcribe",
                              )
    return result["text"]

In [5]:
file_path = "Tourist_Place.m4a"
en_transcribed_text = transcribe_audio(file_path, "medium", language="en")
print("English :", en_transcribed_text)

free_gpu_memory()

English :  Tell me about a tourist place near Teenagar.


## LLMLoading(DeepSeekR1)

In [12]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")

In [13]:
# def get_ai_response(prompt):
#     """Generates a response using DeepSeek-R1-Distill-Qwen-1.5B."""
#     print("Generating AI response...")
#     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
#     output = model.generate(
#     **inputs,
#     max_new_tokens=300,
#     eos_token_id=tokenizer.eos_token_id,
#     do_sample=True,  # Enable sampling for variability
#     top_p=0.9,       # Controls the probability mass considered
#     temperature=0.7   # Controls randomness (lower = more deterministic)
#     )

#     response = tokenizer.decode(output[0], skip_special_tokens=True)
#     return response

In [None]:

tokenizer.pad_token = tokenizer.eos_token  

def get_deepseek_response(prompt):
    system_prompt = ("You are an AI assistant that provides concise and direct answers. "
                     "Do not include unnecessary thoughts or self-reflections. "
                     "Give factual and to-the-point responses.")

    full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAI:"

    inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs.input_ids.to(model.device)
    attention_mask = inputs.attention_mask.to(model.device)
    
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,  # Explicitly pass attention mask
            max_new_tokens=100,  # Limits response length
            temperature=0.3,  # Reduces randomness for factual responses
            top_k=40,  # Ensures relevant words are chosen
            repetition_penalty=1.05,  # Prevents repetition
            eos_token_id=tokenizer.eos_token_id  # Stops generation at the end
        )
    
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    # Remove system prompt from response
    return response.replace(full_prompt, "").strip()


## DeepSeek Output

In [15]:
prompt = "Tell me about a tourist spot near T-Nagar, Chennai"
deepseek_response = get_deepseek_response(prompt)

print(deepseek_response)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


There are several tourist spots near T-Nagar in Chennai. One popular one is the "Chennai Zoo & Botanical Garden," which offers a variety of exhibits and is a great place for both children and adults. Another option is the "Chennai National Park," which is known for its natural beauty and wildlife habitats. Additionally, there's the "Chennai Zoo," which is more kid-friendly and has interactive exhibits. Each of these places offers unique experiences that can make your visit memorable.


## PyTTS(TextToSpeech)

In [12]:
import pyttsx3 as tts
engine = tts.init() # object creation

""" RATE"""
rate = engine.getProperty('rate')   # getting details of current speaking rate
print ("Rate : ",rate)                        #printing current voice rate
engine.setProperty('rate', 125)     # setting up new voice rate


"""VOLUME"""
volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
print ("Volume : ",volume)                          #printing current volume level
engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1

"""VOICE"""
voices = engine.getProperty('voices')       #getting details of current voice
#engine.setProperty('voice', voices[0].id)  #changing index, changes voices. o for male
engine.setProperty('voice', voices[0].id)   #changing index, changes voices. 1 for female

engine.say(deepseek_response)
engine.say('My current speaking rate is ' + str(rate))
engine.runAndWait()
if engine._inLoop:
    engine.endLoop()
engine.stop()

# """Saving Voice to a file"""

# engine.save_to_file('Hello World', 'test.mp3')
# engine.runAndWait()

# if engine._inLoop:
#     engine.endLoop()

Rate :  140
Volume :  1.0


In [2]:
 # temp="""There are several tourist spots near T-Nagar in Chennai. One popular one is the "Chennai Zoo & Botanical Garden," which offers a variety of exhibits and is a great place for both children and adults. Another option is the "Chennai National Park," which is known for its natural beauty and wildlife habitats. Additionally, there's the "Chennai Zoo," which is more kid-friendly and has interactive exhibits. Each of these places offers unique experiences that can make your visit memorable."""