<a href="https://colab.research.google.com/github/AkshataKurane/Shark-Tank/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install azure-cognitiveservices-speech
!pip install pydub
!pip install reportlab
!pip install pandas

Collecting azure-cognitiveservices-speech
  Downloading azure_cognitiveservices_speech-1.41.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Downloading azure_cognitiveservices_speech-1.41.1-py3-none-manylinux1_x86_64.whl (39.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.7/39.7 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: azure-cognitiveservices-speech
Successfully installed azure-cognitiveservices-speech-1.41.1
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting reportlab
  Downloading reportlab-4.2.5-py3-none-any.whl.metadata (1.5 kB)
Downloading reportlab-4.2.5-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Succ

In [2]:
import os
import time
import azure.cognitiveservices.speech as speechsdk
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import utils
from reportlab.lib import colors

class ConversationTranscriberWithFixedGuests:
    def __init__(self, subscription_key, region, audio_filename):
        # Initialize Speech Configuration
        self.speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region)
        self.speech_config.speech_recognition_language = "en-US"

        # Enable speaker diarization (identify different speakers)
        self.speech_config.set_property(
            property_id=speechsdk.PropertyId.SpeechServiceResponse_DiarizeIntermediateResults,
            value='true'
        )

        # Initialize Audio Configuration
        self.audio_config = speechsdk.audio.AudioConfig(filename=audio_filename)

        # Initialize Conversation Transcriber
        self.conversation_transcriber = speechsdk.transcription. ConversationTranscriber(
            speech_config=self.speech_config,
            audio_config=self.audio_config
        )

        # Initialize speaker mapping and guest labels
        self.speaker_mapping = {}
        self.guest_labels = ["Guest 1", "Guest 2"]
        self.transcribing_stop = False

        # Store transcription results
        self.transcription_results = []

        # Connect event callbacks
        self.conversation_transcriber.transcribed.connect(self.transcribed_cb)
        self.conversation_transcriber.transcribing.connect(self.transcribing_cb)
        self.conversation_transcriber.session_started.connect(self.session_started_cb)
        self.conversation_transcriber.session_stopped.connect(self.session_stopped_cb)
        self.conversation_transcriber.canceled.connect(self.canceled_cb)
        self.conversation_transcriber.session_stopped.connect(self.stop_cb)
        self.conversation_transcriber.canceled.connect(self.stop_cb)

    def get_guest_label(self, speaker_id):
        """
        Assigns a Guest label to a speaker ID if not already assigned.
        Limits to Guest 1, Guest 2, and dynamically assigns new guests.
        """
        if not speaker_id:
            # Treat empty or None speaker_id as None
            speaker_id = "None"

        if speaker_id not in self.speaker_mapping:
            if len(self.speaker_mapping) < 2:
                # Assign next available guest label
                self.speaker_mapping[speaker_id] = self.guest_labels[len(self.speaker_mapping)]
            else:
                # Assign 'Unknown' for unidentified speakers
                self.speaker_mapping[speaker_id] = "Unknown" if speaker_id == "None" else f"Guest {len(self.speaker_mapping) + 1}"

        return self.speaker_mapping[speaker_id]

    def transcribed_cb(self, evt: speechsdk. SpeechRecognitionEventArgs):
        """
        Callback for when a segment of speech has been transcribed.
        """
        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
            speaker_label = self.get_guest_label(evt.result.speaker_id)
            result_text = f"{speaker_label}: {evt.result.text}"
            print(result_text)
            self.transcription_results.append(result_text)
        elif evt.result.reason == speechsdk.ResultReason.NoMatch:
            # Optionally handle NoMatch cases if needed
            pass  # Ignoring NoMatch for cleaner output

    def transcribing_cb(self, evt: speechsdk. SpeechRecognitionEventArgs):
        """
        Callback for when the transcription is ongoing (intermediate results).
        """
        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
            speaker_label = self.get_guest_label(evt.result.speaker_id)
            result_text = f"{speaker_label}: {evt.result.text}"
            print(result_text)
            self.transcription_results.append(result_text)

    def session_started_cb(self, evt: speechsdk.SessionEventArgs):
        """
        Callback for when the transcription session starts.
        """
        # Omit print to keep output clean
        pass

    def session_stopped_cb(self, evt: speechsdk.SessionEventArgs):
        """
        Callback for when the transcription session stops.
        """
        # Omit print to keep output clean
        pass

    def canceled_cb(self, evt: speechsdk.SessionEventArgs):
        """
        Callback for when the transcription is canceled.
        """
        # Omit print to keep output clean
        pass

    def stop_cb(self, evt: speechsdk.SessionEventArgs):
        """
        Callback to signal the transcription should stop.
        """
        self.transcribing_stop = True

    def recognize_from_file(self):
        """
        Starts the transcription process and waits for it to complete.
        """
        self.conversation_transcriber.start_transcribing_async()
        # Optionally, print a start message
        # print("Transcription started...")

        # Wait until the transcription session signals to stop
        while not self.transcribing_stop:
            time.sleep(0.5)

        self.conversation_transcriber.stop_transcribing_async()
        # Optionally, print a stop message
        # print("Transcription stopped.")

        # Save results to PDF
        self.save_to_pdf()

    def save_to_pdf(self):
      """
      Saves the transcription results to a PDF file with different colors for each guest,
      adding a blank line after each guest's transcription.
      """
      pdf_filename = "/content/SharkTank/TranscriptionOutput.pdf"
      c = canvas.Canvas(pdf_filename, pagesize=letter)
      width, height = letter

      # Add title
      c.setFont("Helvetica-Bold", 16)
      c.drawString(40, height - 40, "Transcription")
      c.setFont("Helvetica", 12)

      # Define colors for each guest
      guest_colors = {
          "Guest 1": colors.navy,
          "Guest 2": colors.teal,
          "Unknown": colors.red,
      }

      # Set the starting position with bottom padding adjustment
      y_position = height - 60  # Start below the title
      line_spacing = 20  # Increase spacing between lines
      bottom_padding = 50  # Ensure space at the bottom of each page

      for line in self.transcription_results:
          # Get the guest label from the line
          guest_label = line.split(":")[0]  # Assuming format "Guest X: text"

          # Set the fill color based on the guest label
          c.setFillColor(guest_colors.get(guest_label, colors.black))

          # Wrap the text and draw it
          wrapped_text = utils.simpleSplit(line, c._fontname, c._fontsize, width - 80)
          for wrap_line in wrapped_text:
              c.drawString(40, y_position, wrap_line)
              y_position -= line_spacing

          # Add a blank line after each guest's transcription
          y_position -= line_spacing

          # Check if we need to create a new page
          if y_position < bottom_padding:
              c.showPage()
              y_position = height - 60  # Reset position for the new page

      c.save()
      print(f"Transcription saved to {pdf_filename}")
def main():
    try:
        # Replace with your actual subscription key and region
        subscription_key = " 2f3cec9a9e654383ba27a90952c1cadf"  # Replace with your actual key securely
        region = "centralindia"

        # Path to your audio file
        audio_filename = "/content/SharkTank/ABsPodcastLarge.wav"

        # Verify that the audio file exists
        if not os.path.exists(audio_filename):
            print(f"Audio file not found at path: {audio_filename}")
            return

        # Initialize the transcriber with your credentials and audio file
        transcriber = ConversationTranscriberWithFixedGuests(subscription_key, region, audio_filename)

        # Start the transcription process
        transcriber.recognize_from_file()

    except speechsdk. CognitiveServicesSpeechException as e:
        print(f"Speech SDK Error: {e}")
    except Exception as err:
        print(f"Encountered exception: {err}")

if __name__ == "__main__":
    main()

Guest 1: Welcome everyone to another edition of the Tony Siriani Podcast. I am thrilled to have owner Erzan with us. He is the head of Alliance Bernstein Private Wealth. So thanks for being with us, owner.
Guest 2: Thank you, Tony. Thanks for having me.
Guest 1: Well, my pleasure. Say, you know, Alliance Bernstein has an old, you know, well known name on Wall Street, right? So it and it has.
Guest 1: History. Just do me a favor, give me an update on on on where it is today.
Guest 2: Sure. I always joke with my colleagues that sometimes we are a well kept secret on Wall Street because we have always the global reach and scale of a large asset manager. We manage close to $800 billion, but then we still have an intimate private wealth management business. It's a entrepreneurial part of our culture and.
Guest 2: Our integrated asset and wealth management business and I call it entrepreneurial and intimate because we operate out of 20 offices and we have 300 advisors. So you have a little b

# Summarization

In [4]:
!pip install PyPDF2 openai fpdf

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting openai
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading openai-1.51.2-py3-none-any.whl (383 kB)


In [19]:
import os
import re
from collections import Counter
import PyPDF2
from fpdf import FPDF
import matplotlib.pyplot as plt
import seaborn as sns
from openai import AzureOpenAI

OPENAI_API_VERSION = "2024-02-01"
AZURE_OPENAI_ENDPOINT = "https://hexavarsity-secureapi.azurewebsites.net/api/azureai"
AZURE_OPENAI_API_KEY = "04a13dcb07aabdac"  # Provide your API key here

client = AzureOpenAI(api_version=OPENAI_API_VERSION, azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY)

# Read the content of the PDF file using PyPDF2
file_path = "/content/SharkTank/TranscriptionOutput.pdf"
file_content = ""
with open(file_path, 'rb') as file:  # Open in binary read mode 'rb'
    pdf_reader = PyPDF2.PdfReader(file)
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        file_content += page.extract_text()

# Extract dynamic data
def extract_speaker_turns_and_word_count(text):
    speaker_turns = Counter()
    word_count = Counter()
    speakers = re.findall(r'(Guest \d+):', text)
    for speaker in speakers:
        speaker_turns[speaker] += 1
    for speaker in speaker_turns:
        words = re.findall(rf'{speaker}: (.*?)\n', text, re.DOTALL)
        word_count[speaker] = sum(len(word.split()) for word in words)
    return speaker_turns, word_count

def extract_topic_segmentation(text):
    topics = re.findall(r'(\d+)\s(.*?)\s', text)
    return {topic: int(time) for time, topic in topics}

speaker_turn_count, total_word_count = extract_speaker_turns_and_word_count(file_content)
topic_segmentation = extract_topic_segmentation(file_content)

# Create the prompt
prompt = f"""
You are a helpful assistant. Here is a text:

{file_content}

Print the whole content from pdf first then,

Utterance is the Main conversation that occurred, having said that give the title SUMMARY and Give a combined detailed summary of the utterances from pdf(the pdf is likely to be on finance) make sure the summary is readable so every line should only contain certain words and then use next line but give summary in a paragraph, Give the title Question and Answer,Identify and list out all the questions and Answers asked form the pdf
give a title LIST OF COMPANIES mentioned in the pdf. (please dont hallucinate and stick to pdf.)

Sentiment Over Time:
Sentiment analysis plotted over the duration of the conversation to show how sentiment changes.
"""
prompt = prompt.replace("'", "''")

# Make the API call
res = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant give answers for the questions asked by user"},
        {"role": "user", "content": prompt}
    ],
    temperature=0.7,
    max_tokens=900,
    top_p=0.6,
    frequency_penalty=0.7
)

# Get the response content
response_content = res.choices[0].message.content

# Create a new PDF with the response content
class PDF(FPDF):

    def chapter_title(self, title):
        self.set_font("Helvetica", "B", 14)
        self.cell(0, 10, title, 0, 1, "L")
        self.ln(10)

    def chapter_body(self, body):
        self.set_font("Helvetica", "", 12)
        body = body.encode('utf-8', 'replace').decode('latin-1')
        self.multi_cell(0, 10, body)
        self.ln()

new_pdf_path = "new_content.pdf"
pdf = PDF()
pdf.add_page()
pdf.chapter_title("Transcript Analysis")
pdf.chapter_body(response_content)
pdf.output(new_pdf_path)

# Visualization part
# Create pie chart for Speaker Turn Count
plt.figure(figsize=(10, 7))
plt.pie(speaker_turn_count.values(), labels=speaker_turn_count.keys(), autopct='%1.1f%%', startangle=140)
plt.title('Speaker Turn Count')
plt.savefig('speaker_turn_count.png')
plt.close()

# Create bar graph for Total Word Count
plt.figure(figsize=(10, 7))
sns.barplot(x=list(total_word_count.keys()), y=list(total_word_count.values()))
plt.title('Total Word Count')
plt.xlabel('Speakers')
plt.ylabel('Word Count')
plt.savefig('total_word_count.png')
plt.close()

# Merge the new PDF with the existing PDF and add images
output_pdf_path = "merged_output.pdf"
with open(file_path, 'rb') as existing_file, open(new_pdf_path, 'rb') as new_file:
    existing_pdf = PyPDF2.PdfReader(existing_file)
    new_pdf = PyPDF2.PdfReader(new_file)
    pdf_writer = PyPDF2.PdfWriter()

    for page_num in range(len(existing_pdf.pages)):
        pdf_writer.add_page(existing_pdf.pages[page_num])

    for page_num in range(len(new_pdf.pages)):
        pdf_writer.add_page(new_pdf.pages[page_num])

    # Add images to the PDF
    pdf_with_images = PDF()
    pdf_with_images.add_page()
    pdf_with_images.chapter_title("Visualizations")
    if os.path.exists('speaker_turn_count.png'):
        pdf_with_images.image('speaker_turn_count.png', x=10, y=30, w=90, h=60)
    if os.path.exists('total_word_count.png'):
        pdf_with_images.image('total_word_count.png', x=110, y=30, w=90, h=60)
    pdf_with_images.output("visualizations.pdf")

# Merge the visualizations PDF with the text PDF
with open("visualizations.pdf", 'rb') as visualizations_file:
    visualizations_pdf = PyPDF2.PdfReader(visualizations_file)
    for page_num in range(len(visualizations_pdf.pages)):
        pdf_writer.add_page(visualizations_pdf.pages[page_num])

with open(output_pdf_path, 'wb') as output_file:
    pdf_writer.write(output_file)

print(f"Merged PDF saved to {output_pdf_path}")


Merged PDF saved to merged_output.pdf
