In [None]:
!pip install gtts pydub
from gtts import gTTS
from pydub import AudioSegment
from pydub.playback import play

# Define the script for each speaker
script = {
    "Alex": [
        "Hey Jordan, thanks for taking the time to meet. I wanted to tell you about our new programming course. It’s a comprehensive course designed for all skill levels, from beginners to advanced programmers. Are you interested in learning more?",
        "Absolutely! The course runs for 12 weeks and covers a range of subjects, including Python, JavaScript, and web development. Each week, there are 3 live sessions and 2 hands-on coding labs. You’ll also get access to recorded sessions in case you miss anything.",
        "The full course is priced at 999 dollars, but we currently have an early-bird discount running, so you can get it for 799 dollars if you sign up before the end of the month.",
        "Good point. We keep the batch size small—around 20 students per batch—so you get more one-on-one time with the instructors and personalized guidance.",
        "In addition to those, we dive into data structures, algorithms, database management, and even API development. There’s also a project week where you’ll build a full-stack application.",
        "Yes, you’ll receive a certificate of completion at the end of the course, which you can add to your portfolio. Plus, we offer job placement assistance once you finish.",
        "Until the end of this month. After that, the price goes back to 999 dollars. So if you’re interested, now’s the best time to sign up!",
        "No problem at all! I’ll send over the details via email. Let me know if you have any other questions."
    ],
    "Jordan": [
        "Definitely! I'm actually looking to enhance my skills. Can you tell me about the key features of the course?",
        "That sounds great! What’s the price?",
        "Oh, nice! What about batch sizes? I prefer smaller groups for better interaction.",
        "What subjects are covered, apart from Python and JavaScript?",
        "That’s impressive. Does the course offer any certification?",
        "Awesome. And the discount—you said it’s available until when?",
        "I’ll definitely think about it. This seems like a great opportunity. Thanks for all the info, Alex!"
    ]
}

# Function to convert text to speech using gTTS and save as an audio file
def text_to_speech(text, file_name):
    tts = gTTS(text=text, lang='en')
    tts.save(file_name)

# Generate audio for Alex
alex_audio = []
for idx, line in enumerate(script["Alex"]):
    file_name = f"alex_line_{idx}.mp3"
    text_to_speech(line, file_name)
    alex_audio.append(AudioSegment.from_mp3(file_name))

# Generate audio for Jordan
jordan_audio = []
for idx, line in enumerate(script["Jordan"]):
    file_name = f"jordan_line_{idx}.mp3"
    text_to_speech(line, file_name)
    jordan_audio.append(AudioSegment.from_mp3(file_name))

# Combine the audio in a conversation flow
conversation = AudioSegment.silent(duration=500)  # Start with a short pause
for i in range(len(alex_audio)):
    conversation += alex_audio[i] + AudioSegment.silent(duration=500)
    if i < len(jordan_audio):
        conversation += jordan_audio[i] + AudioSegment.silent(duration=500)

# Save the combined conversation to a single file
conversation.export("conversation.mp3", format="mp3")

# Optionally, play the final conversation
play(conversation)

print("Conversation audio has been generated and saved as 'conversation.mp3'.")


Collecting gtts
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, gtts
Successfully installed gtts-2.5.3 pydub-0.25.1
Conversation audio has been generated and saved as 'conversation.mp3'.


# New Section

# New Section

In [None]:
play(conversation)

In [None]:
from pydub import AudioSegment

# Load the MP3 file
audio = AudioSegment.from_mp3("conversation.mp3")

# Get basic information about the audio file
duration_in_ms = len(audio)  # Duration of the audio in milliseconds
channels = audio.channels  # Number of channels (1 for mono, 2 for stereo)
frame_rate = audio.frame_rate  # Sample rate of the audio

# Print information about the audio
print(f"Duration: {duration_in_ms / 1000} seconds")
print(f"Channels: {channels}")
print(f"Frame rate: {frame_rate} Hz")

# Export a portion of the audio, for example, the first 10 seconds
first_10_seconds = audio[:10000]
first_10_seconds.export("first_10_seconds.mp3", format="mp3")

Duration: 163.998 seconds
Channels: 1
Frame rate: 24000 Hz


<_io.BufferedRandom name='first_10_seconds.mp3'>

In [None]:
pip install nltk




QUERY HANDLING


In [None]:
import csv
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Ensure necessary NLTK data is downloaded
nltk.download('punkt')

# Load FAQ dataset from CSV
def load_faq_from_csv(csv_file_path):
    faq_data = {}
    with open(csv_file_path, mode='r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Skip the header row
        for row in csv_reader:
            question, answer = row[1], row[2]
            faq_data[question] = answer
    return faq_data

# Preprocess and tokenize the input
def preprocess_input(user_input):
    return nltk.word_tokenize(user_input.lower())

# Respond to user input
def get_response(user_input, faq_data):
    questions = list(faq_data.keys())
    responses = list(faq_data.values())

    # Add user input to the list of questions for comparison
    user_input = [user_input]

    # Use CountVectorizer to convert text data into a matrix of token counts
    vectorizer = CountVectorizer().fit_transform(questions + user_input)
    vectors = vectorizer.toarray()

    # Calculate cosine similarity between user input and all FAQ questions
    cosine_matrix = cosine_similarity(vectors)

    # Get the cosine similarity scores for the user's input
    user_vector = cosine_matrix[-1][:-1]

    # Find the index of the most similar question
    most_similar_idx = np.argmax(user_vector)

    # Get the best match based on cosine similarity
    similarity_score = user_vector[most_similar_idx]

    if similarity_score > 0.5:  # Threshold for similarity
        return responses[most_similar_idx]
    else:
        return "I'm sorry, I don't have information about that. Could you please rephrase?"

# Main loop for bot interaction
def customer_support_bot(faq_data):
    print("Customer Support Bot: Hello! How can I assist you today?")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit", "bye"]:
            print("Customer Support Bot: Thank you for using our service. Have a great day!")
            break

        response = get_response(user_input, faq_data)
        print(f"Customer Support Bot: {response}")

# Run the customer support bot
if __name__ == "__main__":
    # Load FAQ data from CSV file
    csv_file_path = 'faq_dataset.csv'  # Specify the path to your CSV file
    faq_data = load_faq_from_csv(csv_file_path)

    # Start the customer support bot
    customer_support_bot(faq_data)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Customer Support Bot: Hello! How can I assist you today?
You: how can i reset my password
Customer Support Bot: To reset your password, go to the login page and click on 'Forgot Password.' Follow the instructions sent to your registered email to reset your password.
You: exit
Customer Support Bot: Thank you for using our service. Have a great day!
