**STT engine**
-

In [None]:
!pip install speechRecognition

In [2]:
import speech_recognition as sr

# Initialize recognizer
recognizer = sr.Recognizer()

# Function to record and transcribe audio
def record_and_transcribe():
    with sr.Microphone() as source:
        print("Adjusting for ambient noise, please wait...")
        recognizer.adjust_for_ambient_noise(source)
        print("Recording... Speak now.")

        # Adjust the pause_threshold and energy_threshold as needed
        recognizer.pause_threshold = 1.0
        recognizer.energy_threshold = 300

        audio = recognizer.listen(source, timeout=None, phrase_time_limit=None)
        print("Finished recording.")

        try:
            print("Recognizing...")
            text = recognizer.recognize_google(audio)
            print("Transcribed Text: ", text)
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))

# Record and transcribe audio
record_and_transcribe()

ALSA lib conf.c:4028:(snd_config_hooks_call) Cannot open shared library libasound_module_conf_pulse.so (/home/d1e/miniconda3/envs/elib/lib/alsa-lib/libasound_module_conf_pulse.so: cannot open shared object file: No such file or directory)
ALSA lib control.c:1570:(snd_ctl_open_noupdate) Invalid CTL hw:0
ALSA lib pcm.c:2722:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.rear
ALSA lib pcm.c:2722:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.center_lfe
ALSA lib pcm.c:2722:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.side
ALSA lib pcm_route.c:878:(find_matching_chmap) Found no matching channel map
ALSA lib pcm_route.c:878:(find_matching_chmap) Found no matching channel map
ALSA lib pcm_route.c:878:(find_matching_chmap) Found no matching channel map
ALSA lib pcm_route.c:878:(find_matching_chmap) Found no matching channel map
ALSA lib dlmisc.c:339:(snd_dlobj_cache_get0) Cannot open shared library libasound_module_rate_lavrate.so (/home/d1e/miniconda3/envs/elib/lib/alsa-lib/libasound_module_ra

Adjusting for ambient noise, please wait...


: 

**DOCX to JSON**
-

In [None]:
!pip install python-docx

In [3]:
import docx
import re
import json


def extract_text_from_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)


def extract_museum_info(text):
    museum_info = {}

    # Extract basic information
    museum_info['name'] = re.search(r'Name: (.+)', text).group(1)
    museum_info['location'] = re.search(r'Location: (.+)', text).group(1)
    museum_info['contact'] = re.search(r'Contact: (.+)', text).group(1)
    museum_info['website'] = re.search(r'Website: (.+)', text).group(1)
    museum_info['opening_hours'] = re.search(r'Opening Hours: (.+)', text).group(1)

    # Extract ticket prices
    ticket_prices = re.findall(r'([A-Za-z ]+) \([\w\s\+]+\): \$(\d+)', text)
    museum_info['ticket_prices'] = {category: int(price) for category, price in ticket_prices}

    return museum_info


def extract_gallery_info(text):
    galleries = {}
    gallery_sections = re.findall(r'Gallery ([A-Z]): (.+?)Key Artifacts:(.*?)(?=Gallery|\Z)', text, re.DOTALL)

    for letter, name, artifacts in gallery_sections:
        galleries[f'Gallery {letter}'] = {
            'name': name.strip(),
            'artifacts': [artifact.strip() for artifact in artifacts.strip().split('\n') if artifact.strip()]
        }

    return galleries


def extract_artifact_details(text):
    artifacts = {}
    artifact_sections = re.findall(
        r'(?<=\n\n)([\w\s\'\-]+)\nLocation: (Gallery [A-Z])\nDescription: (.+?)\nSignificance: (.+?)(?=\n\n|$)',
        text, re.DOTALL
    )

    for name, location, description, significance in artifact_sections:
        artifacts[name.strip()] = {
            'location': location.strip(),
            'description': description.strip(),
            'significance': significance.strip()
        }
    return artifacts
def create_museum_json(file_path, output_file):
  text = extract_text_from_docx(file_path)

  museum_info = extract_museum_info(text)

  galleries = extract_gallery_info(text)

  artifacts = extract_artifact_details(text)

  museum_data = {
      "museum_info": museum_info,
      "galleries": galleries,
      "artifacts": artifacts
  }

  with open(output_file, 'w') as f:
      json.dump(museum_data, f, indent=4)

  print(f"Museum data has been written to {output_file}")

# Usage
file_path = "Museum.docx"
output_file = "museum_data1.json"
create_museum_json(file_path, output_file)

Museum data has been written to museum_data1.json


**Preprocessing and Model training**
-

In [4]:
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Load the JSON file
with open('museum_data.json', 'r') as f:
    museum_data = json.load(f)

def preprocess_question(question):
    # Tokenize and POS tag the question
    tokens = word_tokenize(question.lower())
    tagged = pos_tag(tokens)
    # Keep only nouns, verbs, and adjectives
    important_words = [word for word, tag in tagged if tag.startswith(('N', 'V', 'J', 'PRP'))]
    return important_words

def search_json(data, keywords, current_path=[]):
    if isinstance(data, dict):
        for key, value in data.items():
            new_path = current_path + [key]
            if any(keyword in key.lower() for keyword in keywords):
                yield new_path, value
            if isinstance(value, (dict, list)):
                yield from search_json(value, keywords, new_path)
    elif isinstance(data, list):
        for i, item in enumerate(data):
            new_path = current_path + [str(i)]
            if isinstance(item, (dict, list)):
                yield from search_json(item, keywords, new_path)

def answer_question(question):
    keywords = preprocess_question(question)
    results = list(search_json(museum_data, keywords))

    if not results:
        return "I'm sorry, I couldn't find information related to your question."

    # Sort results by the number of keyword matches in the path
    sorted_results = sorted(results, key=lambda x: sum(any(keyword in part.lower() for keyword in keywords) for part in x[0]), reverse=True)

    best_match = sorted_results[0]
    path, value = best_match

    # Construct the answer
    if isinstance(value, (str, int, float)):
        return f"{value}"
    elif isinstance(value, dict):
        return f"{json.dumps(value, indent=2)}"
    elif isinstance(value, list):
        return f"{', '.join(map(str, value))}"

[nltk_data] Downloading package punkt to /home/d1e/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/d1e/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [5]:
# Example usage
questions = [
    # "World war description?",
    # "Tell about the Egyptian Sarcophagus.",
    "When does the museum close?",
    # "How much are tickets for students?",
    "What are the artifacts can find in gallery A?",
    # "What about apollo?"

]

for question in questions:
    print(f"Question: {question}")
    answer = answer_question(question)
    print(f"Answer: {answer}\n")

Question: When does the museum close?
Answer: {
  "name": "Grand Heritage Museum",
  "location": "123 Museum Avenue, Historical City, Country",
  "contact": "+123-456-7890",
  "website": "www.grandheritagemuseum.com",
  "opening_hours": "9AM - 5PM",
  "ticket_prices": {
    " Adults": 15,
    "Seniors": 12,
    "Students": 10
  }
}

Question: What are the artifacts can find in gallery A?
Answer: Mesopotamian Clay Tablet, Egyptian Sarcophagus, Indus Valley Terracotta Figures, Chinese Bronze Vessels



**Natural Language Generation**
-

In [None]:
!pip install pyttsx3

In [None]:
import os
import google.generativeai as genai

# Set up the Google API key
os.environ['GOOGLE_API_KEY'] = 'AIzaSyBbWlV57J4ro8DSRWlDMZYkPSSzYKKRTEA'

# Configure the generative AI
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model = genai.GenerativeModel("gemini-pro")
chat = model.start_chat(history=[])

def generate_ai_response(text):
    response = chat.send_message(text, stream=True)
    ai_response = ""
    for chunk in response:
        ai_response += chunk.text
    first_sentence = ai_response.split(". ")[0].strip()
    return first_sentence

def main():
    print("What's up, How may I help you?")
    
    while True:
        user_input = input("You: ").strip()
        
        if user_input.lower() == "thank you":
            print("Assistant: You're welcome! Exiting...")
            break
        
        ai_response = generate_ai_response(user_input)
        print(f"Assistant: {ai_response}")

if __name__ == "__main__":
    main()

**TSS**
-

In [None]:
from gtts import gTTS
import os

def text_to_speech(text):
    tts = gTTS(text=text, lang="en", slow=False)
    filename = "output.mp3"
    tts.save(filename)
    os.system(f"mpg321 {filename}")
    os.remove(filename)  # Delete the file after playing

if __name__ == "__main__":
    text = input("Enter the text you want to convert to speech: ")
    text_to_speech(text)
