## Libraries And Modules

In [1]:
import whisper
import numpy as np
import transformers
from transformers import pipeline
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer
import googlemaps

## Functions&Definitions

In [2]:
#GARBAGE_COLLECTION
def free_gpu_memory():
  """
  Releases GPU memory occupied by PyTorch tensors and cached allocations.
  """
  # Delete any unused PyTorch tensors
  if torch.cuda.is_available():
    torch.cuda.empty_cache()
  # Trigger garbage collection to potentially release unreferenced objects
  gc.collect()
  torch.cuda.empty_cache()

In [3]:
#Google_API_KEY
GOOGLE_MAPS_API_KEY = "YOURAPIKEY"
gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY)

In [4]:
#DEVICE_SELECTION
device = 0 if torch.cuda.is_available() else -1  # 0 for GPU, -1 for CPU

In [5]:
# Function to extract shortest route using Google Maps API
def get_shortest_distance(origin, destination):
    try:
        directions = gmaps.directions(origin, destination, mode="driving", alternatives=False)
        if directions:
            route = directions[0]['legs'][0]
            return route['distance']['text'], route['duration']['text']
        else:
            return None, None
    except Exception as e:
        return None, None  # Prevents API errors from crashing the script


In [6]:
# Function to generate AI response using DeepSeek
def get_deepseek_response(prompt):
    system_prompt = ("You are an AI assistant that provides concise and direct answers. "
                     "Avoid unnecessary self-reflections. Provide factual responses.")

    full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAI:"

    inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs.input_ids.to(model.device)
    attention_mask = inputs.attention_mask.to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,  
            max_new_tokens=100,  
            temperature=0.3,  
            top_k=40,  
            repetition_penalty=1.05,  
            eos_token_id=tokenizer.eos_token_id  
        )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return response.replace(full_prompt, "").strip()

In [7]:
# Function to decide if Google Maps API should be used or DeepSeek AI
def process_user_prompt(user_prompt):
    travel_keywords = ["shortest distance", "shortest route", "how far", "travel time", "directions to", "get to"]

    if any(keyword in user_prompt.lower() for keyword in travel_keywords):
        # Extracting locations intelligently
        words = user_prompt.lower().replace("to", "|").replace("from", "|").split("|")
        if len(words) >= 2:
            origin, destination = words[-2].strip(), words[-1].strip()
        else:
            return "I need both a starting point and a destination."

        distance, duration = get_shortest_distance(origin, destination)

        if distance:
            return f"The shortest driving distance from {origin} to {destination} is {distance} and takes around {duration}."
        else:
            return f"Sorry, I couldn't find the shortest route from {origin} to {destination}."
    else:
        return get_deepseek_response(user_prompt)


## WhisperTranscription(SpeechToText)

In [8]:
def transcribe_audio(file_path, model_type, language):
    model = whisper.load_model(model_type)
    result = model.transcribe(file_path,
                              language=language,
                              task="transcribe",
                              )
    return result["text"]

In [9]:
file_path = "AnnaToTambaram.m4a"
en_transcribed_text = transcribe_audio(file_path, "medium", language="en")
print("English :", en_transcribed_text)

free_gpu_memory()

English :  How far is Annanagar from Tambaram?


## LLMLoading(DeepSeekR1)

In [10]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [11]:
# Initialize DeepSeek Model
MODEL_NAME = "deepseek-ai/deepseek-r1-distill-qwen-1.5b"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="cuda")
# Ensure PAD token is set correctly
tokenizer.pad_token = tokenizer.eos_token  

## DeepSeek Output

In [12]:
# Example Usage
# user_input = "What is the shortest distance from T-Nagar to Marina Beach?"
user_input=en_transcribed_text
ai_response = process_user_prompt(user_input)
print(ai_response)

The shortest driving distance from how far is annanagar to tambaram? is 27.9 km and takes around 45 mins.


## PyTTS(TextToSpeech)

In [13]:
import pyttsx3 as tts
engine = tts.init() # object creation

""" RATE"""
rate = engine.getProperty('rate')   # getting details of current speaking rate
print ("Rate : ",rate)                        #printing current voice rate
engine.setProperty('rate', 125)     # setting up new voice rate


"""VOLUME"""
volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
print ("Volume : ",volume)                          #printing current volume level
engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1

"""VOICE"""
voices = engine.getProperty('voices')       #getting details of current voice
#engine.setProperty('voice', voices[0].id)  #changing index, changes voices. o for male
engine.setProperty('voice', voices[0].id)   #changing index, changes voices. 1 for female

engine.say(ai_response)
engine.say('My current speaking rate is ' + str(rate))
engine.runAndWait()
if engine._inLoop:
    engine.endLoop()
engine.stop()

Rate :  200
Volume :  1.0
