In [8]:
!pip install openai-whisper torch
!pip install pydub
!pip install pyannote.audio
!pip install openai
!pip install --upgrade openai



In [None]:
from google.colab import files
import openai
import whisper
import os
from openai import OpenAI
from pydub import AudioSegment
import torch
from pyannote.audio import Pipeline
from pyannote.core import Segment
from openai.types import ChatModel
import time
from google.colab import drive

In [None]:
# Start Drive
#drive.mount('/content/drive')

In [None]:
project_dir = '((Google-Drive-Adress))'

In [None]:
# OpenAI Project Id and Key description
api_key = os.getenv("-api-key-")
project_id = os.getenv("-project-id's-")

api_key = ("-api-key-")
openai.api_key = api_key

client = openai.OpenAI(api_key=api_key)

# File Upload
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Converting audio file to wav format
audio = AudioSegment.from_file(file_name)
file_extension = os.path.splitext(file_name)[1].lower()
if file_extension != '.wav':
    audio.export("converted_audio.wav", format="wav")
    file_name = "converted_audio.wav"

print("Audio File Identified")

# Text extraction with Whisper model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model("medium", device=device)

options = whisper.DecodingOptions(language="tr", beam_size=3, best_of=3)
result = model.transcribe(file_name, **options.__dict__)
text = result["text"]

print("Text:", text)

# Writing the extracted text to a file
with open("output.txt", "w", encoding="utf-8") as file:
    file.write(text)

print("Text file created")

# pyannote.audio pipeline'ını başlatma
try:
    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
    if pipeline is None:
        raise ValueError("Pipeline yüklenemedi.")
except Exception as e:
    print(f"Pipeline başlatılamadı: {e}")
    raise

num_speakers = 2  #  Enter the number of speakers here

# Speaker Recognition

try:
    diarization = pipeline(file_name, num_speakers=num_speakers)
    if diarization is None:
        raise ValueError("Diarization operation failed.")

    # Create an empty list to store speaker recognition result
    speaker_segments = []

    # Print speaker recognition result
    for segment, _, speaker in diarization.itertracks(yield_label=True):
        speaker_segments.append((segment.start, segment.end, speaker))
        print(f"Beginning={segment.start:.2f}s Finish={segment.end:.2f}s Speaker_{speaker}")
except Exception as e:
    print(f"Speaker recognition failed: {e}")
    raise

# Dividing transcription segments into smaller pieces
def split_segment(segment, chunk_size=0.5):
    start = segment['start']
    end = segment['end']
    text = segment['text']

    chunks = []
    current_start = start
    while current_start < end:
        current_end = min(current_start + chunk_size, end)
        chunks.append({'start': current_start, 'end': current_end, 'text': text})
        current_start += chunk_size
    return chunks

# Dividing entire transcription segments into smaller pieces
small_segments = []
for segment in result['segments']:
    small_segments.extend(split_segment(segment))

final_transcription = []
for segment in result['segments']:
    start = segment['start']
    end = segment['end']
    text_segment = segment['text']

    for seg_start, seg_end, speaker in speaker_segments:
        if seg_start <= start <= seg_end or seg_start <= end <= seg_end:
            final_transcription.append(f"Speaker {speaker}: {text_segment}")
            break

# Writing the final transcription to a file
with open("final_output.txt", "w", encoding="utf-8") as file:
    for line in final_transcription:
        file.write(line + '\n')

# Load text as system message
with open("final_output.txt", "r", encoding="utf-8") as file:
    final_transcription_text = file.read()

system_message = f"Transcription of this audio file: {final_transcription_text}"

print("Text file and speaker recognition results created")

def chat_with_gpt3(system_message, user_message, model="gpt-3.5-turbo", max_tokens=150, temperature=0.7, top_p=0.9):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )
        return response.choices[0].message.content.strip()
    except openai.OpenAIError as e:
        if "Rate limit" in str(e):
            print(f"Rate limit exceeded: {e}")
            time.sleep(60)  # wait 60 seconds
            return chat_with_gpt3(system_message, user_message, model, max_tokens, temperature, top_p)
        else:
            raise

def is_question_relevant(user_message, context):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": context},
            {"role": "user", "content": f"According to this text '{user_message}' Is there an answer to the question?"}
        ],
        max_tokens=50,
        temperature=0,
        top_p=1,
    )
    relevance_check = response.choices[0].message.content.strip()
    return "Yes" in relevance_check

print("Chatbot ready to use..")

# Example usage
while True:
    user_input = input("Ask your question: ")
    if user_input.lower() in ["exit", "quit"]:
        break

    if is_question_relevant(user_input, system_message):
        response = chat_with_gpt3(system_message, user_input, model="gpt-3.5-turbo")
        print(f"AI: {response}")
    else:
        print("Sorry, I don't know about this question.")
    time.sleep(5)