<a href="https://colab.research.google.com/github/AkshataKurane/Shark-Tank/blob/main/MultipleAudioFiles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisites

In [1]:
!pip install git+https://github.com/openai/whisper.git
!pip install ffmpeg
!pip install pydub
!pip install -qq https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
!pip install -qq ipython==7.34.0
!pip install SpeechRecognition
!pip install pocketsphinx
!pip install reportlab
!pip install PyMuPDF
!pip install openai
!pip install PyPDF2 fpdf

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-0xkzen3c
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-0xkzen3c
  Resolved https://github.com/openai/whisper.git to commit 25639fc17ddc013d56c594bfbf7644f2185fad84
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper==20240930)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper==20240930)
  Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

# Transcription

In [2]:
#With if else to check preprossed or not
from pyannote.audio import Pipeline
from pydub import AudioSegment
import pandas as pd
import whisper
import tempfile
import os
import pandas as pd
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet

def convert_mp3_to_wav(mp3_file_path, wav_file_path):
    audio = AudioSegment.from_mp3(mp3_file_path)
    audio.export(wav_file_path, format="wav")

def rttm_to_dataframe(rttm_file_path):
    columns = ["Type", "File ID", "Channel", "Start Time", "Duration", "Orthography", "Confidence", "Speaker", 'x', 'y']
    with open(rttm_file_path, 'r') as rttm_file:
        lines = rttm_file.readlines()
        data = [line.strip().split() for line in lines]
    df = pd.DataFrame(data, columns=columns)
    df = df.drop(["Type", "File ID", "Channel", "Orthography", "Confidence", 'x', 'y'], axis=1)
    return df

def extract_text_from_audio_segment(audio_segment):
    model = whisper.load_model("base")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
        temp_path = temp_file.name
        audio_segment.export(temp_path, format="wav")
        result = model.transcribe(temp_path)
        os.remove(temp_path)
    return result['text']

def get_audio_segment(audio_file_path, start_time, end_time):
    audio = AudioSegment.from_wav(audio_file_path)
    start_ms = int(start_time * 1000)
    end_ms = int(end_time * 1000)
    return audio[start_ms:end_ms]

def process_audio_file(mp3_file_path):
    # Convert MP3 to WAV
    wav_file_path = mp3_file_path.replace('.mp3', '.wav')
    convert_mp3_to_wav(mp3_file_path, wav_file_path)

    # Speaker diarization
    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1", use_auth_token="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")
    diarization = pipeline(wav_file_path, num_speakers=2)

    rttm_file_path = wav_file_path.replace('.wav', '.rttm')
    with open(rttm_file_path, "w") as rttm:
        diarization.write_rttm(rttm)

    # Process RTTM to DataFrame
    df = rttm_to_dataframe(rttm_file_path)
    df = df.astype({'Start Time': 'float', 'Duration': 'float'})
    df['Utterance'] = None
    df['End Time'] = df['Start Time'] + df['Duration']

    # Transcribe audio segments
    for ind in df.index:
        start_time = df.loc[ind, 'Start Time']
        end_time = df.loc[ind, 'End Time']
        try:
            audio_segment = get_audio_segment(wav_file_path, start_time, end_time)
            transcription = extract_text_from_audio_segment(audio_segment)
            df.loc[ind, 'Utterance'] = transcription
        except Exception as e:
            print(f"Error processing index {ind}: {e}")
            df.loc[ind, 'Utterance'] = 'Error'
    print(df)

    # Save DataFrame to CSV
    global output_csv_path
    output_csv_path = mp3_file_path.replace('.mp3', '.csv')
    df.to_csv(output_csv_path, index=False)
    save_to_pdf_file_path = output_csv_path.replace('.csv','_Transcription.pdf')
    file_name = output_csv_path.replace('.csv', '')
    df = pd.read_csv(output_csv_path)
    pdf = SimpleDocTemplate(save_to_pdf_file_path, pagesize=letter)
    styles = getSampleStyleSheet()
    styleN = styles['BodyText']
    title_style = styles['Title']
    title = Paragraph(f"Audio Transcription of '{file_name}'\n", title_style)
    data = [df.columns.to_list()] + [[Paragraph(str(cell), styleN) for cell in row] for row in df.values]
    col_widths = [75, 60, 100, 300, 62]
    table = Table(data, colWidths=col_widths)
    style = TableStyle([
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 12),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('GRID', (0, 0), (-1, -1), 1, colors.black),
        ('RIGHTPADDING', (0, 0), (-1, -1), 6),
        ('LEFTPADDING', (0, 0), (-1, -1), 6),
    ])
    table.setStyle(style)
    elements = [title, table]
    pdf.build(elements)
    print(f"PDF created successfully at: {save_to_pdf_file_path}")
'''
    file_format = input("\nYour output is saved in .csv format. Do you want in any other format? (e.g. csv, text, excel) : ")
    if file_format == "text":
      output_csv_path = mp3_file_path.replace('.mp3', '.txt')
      df.to_csv(output_csv_path, index=False)
    elif file_format == "excel":
      output_csv_path = mp3_file_path.replace('.mp3', '.xlsx')
      df.to_excel(output_csv_path, index=False)
    else:
      print("\nInvalid file format")
    print(f"\nProcessed {mp3_file_path}. Results saved to {output_csv_path}\n")'''

folder_path = '/content/SharkTank'

mp3_files = [f for f in os.listdir(folder_path) if f.endswith('.mp3')]
mp3_file_paths = [os.path.join(folder_path, f) for f in mp3_files]

for index, mp3_file_path in enumerate(mp3_file_paths):
    check = mp3_file_path.replace('.mp3', '.csv')
    if os.path.exists(check):
        print(f"{mp3_file_path} is already processed!!!!")
    else:
      print(f"\nProcessing {mp3_file_path}...")
      process_audio_file(mp3_file_path)
      print(f"{mp3_file_path} is processed successfully!!!!\n\n")


Processing /content/SharkTank/videoplaybackshort.mp3...


config.yaml:   0%|          | 0.00/500 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/318 [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.4.1+cu121. Bad things might happen unless you revert torch to 1.x.


hyperparams.yaml:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

  wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)


embedding_model.ckpt:   0%|          | 0.00/83.3M [00:00<?, ?B/s]

mean_var_norm_emb.ckpt:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

classifier.ckpt:   0%|          | 0.00/5.53M [00:00<?, ?B/s]

label_encoder.txt:   0%|          | 0.00/129k [00:00<?, ?B/s]

  state_dict = torch.load(path, map_location=device)
  stats = torch.load(path, map_location=device)
100%|███████████████████████████████████████| 139M/139M [00:04<00:00, 35.0MiB/s]
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)


   Start Time  Duration     Speaker  \
0       0.031    13.298  SPEAKER_01   
1      13.328     3.324  SPEAKER_00   
2      16.653     3.679  SPEAKER_01   
3      19.471     8.387  SPEAKER_00   
4      28.482    63.990  SPEAKER_00   

                                           Utterance  End Time  
0   One thing which I've taken away for the wealt...    13.329  
1   People are running out of money because of ba...    16.652  
2   No, they're in fact, they will live so long. ...    20.332  
3   What do you think Adani and Mukesham money do...    27.858  
4   Now there's countless content pieces on YouTu...    92.472  
PDF created successfully at: /content/SharkTank/videoplaybackshort_Transcription.pdf
/content/SharkTank/videoplaybackshort.mp3 is processed successfully!!!!



Processing /content/SharkTank/videoplaybacknew.mp3...


INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.4.1+cu121. Bad things might happen unless you revert torch to 1.x.


  state_dict = torch.load(path, map_location=device)
  stats = torch.load(path, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)
  checkpoint = torch.load(fp, map_location=device)


   Start Time  Duration     Speaker  \
0       0.031    29.059  SPEAKER_00   
1      29.090    15.981  SPEAKER_01   
2      33.882     0.203  SPEAKER_00   
3      40.565     0.641  SPEAKER_00   
4      45.070    23.912  SPEAKER_00   

                                           Utterance  End Time  
0   There's a large cap, mid cap, small cap, many...    29.090  
1   So let's start with the 50 lakh rupee a month...    45.071  
2                                              more.    34.085  
3                                           To their    41.206  
4   Yeah. So, you know, somebody was 50 lakh rupe...    68.982  
PDF created successfully at: /content/SharkTank/videoplaybacknew_Transcription.pdf
/content/SharkTank/videoplaybacknew.mp3 is processed successfully!!!!




# Summarization

In [None]:
import os
from openai import AzureOpenAI
import PyPDF2
from fpdf import FPDF

def sanitize_text(text):
    replacements = {
        '\u2014': '-',
        '\u2013': '-',
        '\u2018': "'",
        '\u2019': "'",
        '\u201C': '"',
        '\u201D': '"',
    }
    for orig, repl in replacements.items():
        text = text.replace(orig, repl)
    return text

def summary_of_pdf(pdf_file_path):
    OPENAI_API_VERSION = "2024-02-01"
    AZURE_OPENAI_ENDPOINT = "https://hexavarsity-secureapi.azurewebsites.net/api/azureai"
    AZURE_OPENAI_API_KEY = "04a13dcb07aabdac"

    output_pdf_path = pdf_file_path.replace('.pdf', '_Summary.pdf')
    combined_pdf_path = pdf_file_path.replace('.pdf', '_CombinedOutput.pdf')

    try:
        client = AzureOpenAI(
            api_version=OPENAI_API_VERSION,
            azure_endpoint=AZURE_OPENAI_ENDPOINT,
            api_key=AZURE_OPENAI_API_KEY
        )
    except Exception as e:
        print(f"Error initializing Azure OpenAI client: {e}")
        return

    file_content = ""
    try:
        with open(pdf_file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                extracted_text = page.extract_text()
                if extracted_text:
                    file_content += extracted_text + "\n"
    except Exception as e:
        print(f"Error reading PDF file {pdf_file_path}: {e}")
        return


    prompt = f"""
You are a helpful assistant. Here is a text:
{file_content}
Utterance is the Main conversation that occurred. Having said that, give the bold title as Summary and give a combined summary of the utterances from the PDF (the PDF is likely to be on finance). Give the bold title as Question and Answer, identify and list out all the questions asked and answers given to those questions like-
Question 1:
Answer 1:
Question 2:
Answer 2:
Make the output look similar to output_pdf_path (with respect to font, page size, text size, text-wrapping, vertical spacing)
"""

    try:
        res = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant who provides answers to user questions."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=900,
            top_p=0.6,
            frequency_penalty=0.7
        )
    except Exception as e:
        print(f"Error during OpenAI API call: {e}")
        return

    try:
        response_content = res.choices[0].message.content
    except (IndexError, AttributeError) as e:
        print(f"Error retrieving response content: {e}")
        return

    sanitized_content = sanitize_text(response_content)

    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=10)

        for line in sanitized_content.split('\n'):
            pdf.multi_cell(0, 10, line)

        pdf.output(output_pdf_path)
        print(f"Summary PDF saved successfully at: {output_pdf_path}")
    except Exception as e:
        print(f"Error creating summary PDF {output_pdf_path}: {e}")
        return

    try:
        with open(output_pdf_path, 'rb') as output_pdf_file, open(pdf_file_path, 'rb') as existing_pdf_file:
            output_pdf_reader = PyPDF2.PdfReader(output_pdf_file)
            existing_pdf_reader = PyPDF2.PdfReader(existing_pdf_file)

            pdf_writer = PyPDF2.PdfWriter()

            for page in existing_pdf_reader.pages:
                pdf_writer.add_page(page)

            for page in output_pdf_reader.pages:
                pdf_writer.add_page(page)

            with open(combined_pdf_path, 'wb') as combined_pdf_file:
                pdf_writer.write(combined_pdf_file)

        print(f"Combined PDF saved successfully at: {combined_pdf_path}")
    except Exception as e:
        print(f"Error during PDF combination: {e}")

def main():
    """
    Main function to process all PDF files in the specified folder.
    """
    folder_path = '/content/SharkTank'

    if not os.path.isdir(folder_path):
        print(f"Folder path '{folder_path}' does not exist.")
        return

    try:
        pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')]
    except Exception as e:
        print(f"Error listing files in folder '{folder_path}': {e}")
        return

    pdf_file_paths = [os.path.join(folder_path, f) for f in pdf_files]

    for pdf_file_path in pdf_file_paths:
        if pdf_file_path.endswith('_Transcription.pdf'):
            summary = pdf_file_path.replace('.pdf', '_Summary.pdf')
            combined_output = pdf_file_path.replace('.pdf', '_CombinedOutput.pdf')
            if summary in pdf_file_paths or combined_output in pdf_file_paths:
                print(f"{pdf_file_path} is already processed!!!!")
            else:
                print(f"\nProcessing {pdf_file_path}...")
                summary_of_pdf(pdf_file_path)
                print(f"{pdf_file_path} is processed successfully!!!!\n\n")


if __name__ == "__main__":
    main()


/content/SharkTank/videoplaybackshort_Transcription.pdf is already processed!!!!


# Sentiment