In [23]:
import whisper
import pyaudio
import numpy as np
import pandas as pd
import threading
import time
import wave
import model
from datetime import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import TextVectorization

In [33]:
# Load Whisper model
whisper_model = whisper.load_model("base")

# Setup audio parameters
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000

# Initialize PyAudio
p = pyaudio.PyAudio()
all_audio_data = []
recording_lock = threading.Lock()
stop_recording = threading.Event()

# Generate filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
audio_filename = f"recording_{timestamp}.wav"

# Open stream
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

def record_audio():
    """Record audio continuously until stopped"""
    print("* Recording... Press Ctrl+C to stop and transcribe")
    
    while not stop_recording.is_set():
        try:
            data = stream.read(CHUNK, exception_on_overflow=False)
            with recording_lock:
                all_audio_data.append(data)
        except Exception as e:
            print(f"Audio recording error: {e}")
            break

# Start recording thread
recording_thread = threading.Thread(target=record_audio, daemon=True)
recording_thread.start()

try:
    # Keep main thread alive while recording
    while True:
        time.sleep(0.1)
        
except KeyboardInterrupt:
    print("\n* Stopping recording...")
    stop_recording.set()

# Wait for recording thread to finish
recording_thread.join(timeout=1)

# Clean up audio stream
stream.stop_stream()
stream.close()
p.terminate()

# Save audio file and transcribe
if all_audio_data:
    print(f"* Saving audio to {audio_filename}...")
    try:
        with wave.open(audio_filename, 'wb') as wf:
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(all_audio_data))
        print(f"* Audio saved as {audio_filename}")
    except Exception as e:
        print(f"* Error saving audio: {e}")
    
    # Convert audio data for Whisper
    print("* Processing audio with Whisper...")
    try:
        # Combine all audio chunks
        combined_audio = b''.join(all_audio_data)
        
        # Convert to numpy array for Whisper
        audio_np = np.frombuffer(combined_audio, dtype=np.int16).astype(np.float32) / 32768.0
        
        # Transcribe the entire recording
        result = whisper_model.transcribe(audio_np)
        transcribed_text = result['text']
        
        # Display results
        print("\n" + "="*50)
        print("FULL TRANSCRIPTION:")
        print("="*50)
        print(transcribed_text)
        print("="*50)
        
        # Save transcription to text file
        txt_filename = f"transcription_{timestamp}.txt"
        with open(txt_filename, 'w', encoding='utf-8') as f:
            f.write(transcribed_text)
        print(f"* Transcription saved as {txt_filename}")

        # Text Classification
        if transcribed_text.strip():  # Only classify if there's actual text
            print("\n* Starting text classification...")
            
            # Setup text vectorization
            max_vocab = 20000
            sequence_len = 300
            
            vectorizer = TextVectorization(
                max_tokens=max_vocab,
                output_mode='int',
                output_sequence_length=sequence_len
            )
            
            # Note: You need training data to adapt the vectorizer properly
            # For now, we'll adapt on the single text (not ideal for production)
            vectorizer.adapt([transcribed_text])
            
            # Load classification models
            class_models = model.create_ensemble_model('best_model_binary.h5', 'best_model_multi.h5')
            
            if class_models:
                # Make predictions
                final_classification = model.predict_ensemble_model(class_models, [transcribed_text], vectorizer)
                
                if final_classification:
                    print("\n" + "="*50)
                    print("CLASSIFICATION RESULTS:")
                    print("="*50)
                    for key, value in final_classification.items():
                        print(f"{key}: {value}")
                    print("="*50)
                    
                    # Save classification results
                    results_filename = f"classification_{timestamp}.txt"
                    with open(results_filename, 'w', encoding='utf-8') as f:
                        f.write("TRANSCRIPTION:\n")
                        f.write(transcribed_text + "\n\n")
                        f.write("CLASSIFICATION RESULTS:\n")
                        for key, value in final_classification.items():
                            f.write(f"{key}: {value}\n")
                    print(f"* Classification results saved as {results_filename}")
            else:
                print("* Could not load classification models")
        else:
            print("* No text to classify")
            
    except Exception as e:
        print(f"* Error during transcription: {e}")
else:
    print("* No audio data recorded")

print("* Process completed")



* Recording... Press Ctrl+C to stop and transcribe

* Stopping recording...
* Saving audio to recording_20250522_130344.wav...
* Audio saved as recording_20250522_130344.wav
* Processing audio with Whisper...





FULL TRANSCRIPTION:
 This movie was really really bad. I never saw something as shitty as this.
* Transcription saved as transcription_20250522_130344.txt

* Starting text classification...




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step

CLASSIFICATION RESULTS:
sentiment: positive
emotion: ['approval']
* Classification results saved as classification_20250522_130344.txt
* Process completed
