## Import Libraries

In [2]:
pip install jiwer

Collecting jiwer
  Downloading jiwer-3.0.4-py3-none-any.whl.metadata (2.6 kB)
Collecting rapidfuzz<4,>=3 (from jiwer)
  Downloading rapidfuzz-3.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)
Downloading rapidfuzz-3.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-3.0.4 rapidfuzz-3.9.4


In [3]:
import os
import torch
import pandas as pd
import torchaudio
import pandas as pd
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig, WhisperFeatureExtractor
from jiwer import wer
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [4]:
# Set device
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device("cpu")
print(device)

cpu


## Utils

In [5]:
def create_file_mapping(transcript_dir, audio_dir):
    """
    Creates a DataFrame mapping transcript files to their corresponding audio files based on filenames.

    Args:
        transcript_dir (str): Directory containing transcript text files.
        audio_dir (str): Directory containing audio files.

    Returns:
        pd.DataFrame: DataFrame with columns 'Interview', 'Transcript Path', and 'Audio Path'.
    """
    # Get lists of file paths
    transcript_files = [f for f in os.listdir(transcript_dir) if f.endswith('.txt')]
    audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.wav')]

    # Extract file identifiers from filenames
    transcript_ids = {os.path.splitext(f)[0] for f in transcript_files}
    audio_ids = {os.path.splitext(f)[0] for f in audio_files}

    # Determine the intersection of transcript and audio IDs
    common_ids = transcript_ids.intersection(audio_ids)

    # Create the mapping dictionary
    file_mapping = {
        'Interview': [],
        'Transcript Path': [],
        'Audio Path': []
    }

    for file_id in common_ids:
        transcript_path = os.path.join(transcript_dir, file_id + '.txt')
        audio_path = os.path.join(audio_dir, file_id + '.wav')
        file_mapping['Interview'].append(file_id)
        file_mapping['Transcript Path'].append(transcript_path)
        file_mapping['Audio Path'].append(audio_path)

    # Convert the dictionary to a DataFrame
    return pd.DataFrame(file_mapping)


def process_transcripts(paths_df):
    """
    Processes transcript files to extract and format relevant data, and combines them into a single DataFrame.

    Args:
        paths_df (pd.DataFrame): DataFrame with columns 'Interview', 'Transcript Path', and 'Audio Path'.

    Returns:
        pd.DataFrame: DataFrame containing concatenated and processed transcript data.
    """
    all_transcript_data = []

    for _, row in paths_df.iterrows():
        transcript_path = row['Transcript Path']

        # Load transcript data with appropriate delimiter and columns
        transcript_df = pd.read_csv(transcript_path, delimiter="\t", usecols=['StTime', 'EnTime', 'Content'])

        # Convert times from minutes to milliseconds
        transcript_df['StTime'] *= 1000
        transcript_df['EnTime'] *= 1000

        # Standardize content to uppercase
        transcript_df['Content'] = transcript_df['Content'].str.upper()

        # Add the identifier column
        transcript_df['Interview'] = row['Interview']

        # Append to the list
        all_transcript_data.append(transcript_df)

    # Combine all transcript data into a single DataFrame
    return pd.concat(all_transcript_data, ignore_index=True)


In [6]:
class AudioDataset(Dataset):
    def __init__(self, df, transcript_dir, audio_dir, processor, target_sample_rate=16000, target_length_ms=15000, padding_value=0):
        """
        Audio Dataset with Padding for ASR tasks.

        Parameters:
        - df (pd.DataFrame): DataFrame containing the dataset information.
        - transcript_dir (str): Directory containing transcript files.
        - audio_dir (str): Directory containing audio files.
        - processor: Processor for audio and text processing.
        - target_sample_rate (int): Target sample rate for audio segments.
        - target_length_ms (int): Target length for audio segments in milliseconds.
        - padding_value (float): Value to use for padding audio segments.
        """
        self.df = df
        self.transcript_dir = transcript_dir
        self.audio_dir = audio_dir
        self.processor = processor
        self.target_sample_rate = target_sample_rate
        self.target_length_samples = int(target_length_ms * target_sample_rate / 1000)
        self.padding_value = padding_value

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        start_tm = row['StTime']
        end_tm = row['EnTime']
        content = row['Content']
        interview = row['Interview']
        audio_path = os.path.join(self.audio_dir, interview + '.wav')

        # Extract and process audio segment
        audio_segment = self.extract_audio_segment(audio_path, start_tm, end_tm)
        audio_tensor = self.processor(audio_segment.squeeze().numpy(), sampling_rate=self.target_sample_rate, return_tensors="pt").input_features

        # Tokenize and pad transcription
        encodings = self.processor.tokenizer(
            content,
            return_tensors="pt",
            padding='max_length',
            truncation=True,
            max_length=448
        )

        labels = encodings.input_ids.squeeze()
        attention_mask = encodings.attention_mask.squeeze()

        return {
            'input_features': audio_tensor.squeeze(),
            'labels': labels,
            'attention_mask': attention_mask
        }

    def extract_audio_segment(self, audio_file_path, start_time_ms, end_time_ms):
        """
        Extracts and pads a segment from an audio file based on start and end times.

        Parameters:
        - audio_file_path (str): Path to the audio file.
        - start_time_ms (int): Start time in milliseconds.
        - end_time_ms (int): End time in milliseconds.

        Returns:
        - torch.Tensor: Extracted and padded audio segment.
        """
        waveform, sr = torchaudio.load(audio_file_path)

        # Convert milliseconds to sample indices
        start_sample = int(start_time_ms * sr / 1000)
        end_sample = int(end_time_ms * sr / 1000)

        # Extract segment and resample if necessary
        segment = waveform[:, start_sample:end_sample]
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=self.target_sample_rate)
            segment = resampler(segment)

        # Pad or truncate segment
        return self.pad_audio_segment(segment)

    def pad_audio_segment(self, segment):
        """
        Pads or truncates an audio segment to the target length.

        Parameters:
        - segment (torch.Tensor): The audio segment to be padded.

        Returns:
        - torch.Tensor: Padded or truncated audio segment.
        """
        current_length = segment.size(1)
        if current_length < self.target_length_samples:
            padding_size = self.target_length_samples - current_length
            padding = torch.full((segment.size(0), padding_size), self.padding_value)
            padded_segment = torch.cat((segment, padding), dim=1)
        else:
            padded_segment = segment[:, :self.target_length_samples]
        return padded_segment

In [7]:
import os

# Define the path to the text file containing the URLs and the destination folder
url_file_path = '/content/urls_coraal.txt'
destination_folder = '/content/data/CORAAL'

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# Read the URLs from the text file
with open(url_file_path, 'r') as file:
    urls = file.readlines()

# Download each file using wget
for url in urls:
    url = url.strip()  # Remove any leading/trailing whitespace
    if url:
        os.system(f'wget -P {destination_folder} {url}')

In [8]:
import shutil

sources_list = ["ATL", "DCA", "DCB", "DTA", "LES", "PRV", "ROC", "VLD"]

data_path = '/content/data/CORAAL'
destination_path = [f'/content/data/CORAAL/{i}' for i in sources_list]

for idx in range(len(destination_path)):
    for file in sorted(os.listdir(data_path)):
      if file.startswith(f'{sources_list[idx]}_'):
        shutil.move(f"{data_path}/{file}", destination_path[idx])

In [9]:
import tarfile

file = tarfile.open('/content/data/CORAAL/ATL/ATL_audio_part01_2020.05.tar.gz')
file.extractall('/content/data/CORAAL/ATL')

In [10]:
# Set the path to your directory
data_directory = '/content/data/CORAAL/ATL/audio'

# Create a list of file paths
file_paths = [os.path.join(data_directory, filename) for filename in os.listdir(data_directory) if filename.endswith('.wav')]


In [13]:
# Load your data
transcript_dir = '/content/data/CORAAL/text'
audio_dir = '/content/data/CORAAL/audio/'

paths_df = create_file_mapping(transcript_dir, audio_dir)
combined_transcript_df = process_transcripts(paths_df)
display(combined_transcript_df.sample(10))

Unnamed: 0,StTime,Content,EnTime,Interview
3217,1749707.1,IS THAT A-,1750364.6,ATL_se0_ag1_f_03_1
1184,1695826.3,"I DON'T KNOW, BRUH. THIS SHIT THAT I'M DOING, ...",1699155.3,ATL_se0_ag1_m_01_1
259,366140.6,(PAUSE 0.12),366265.5,ATL_se0_ag1_m_01_1
2166,361692.6,OKAY.,362126.8,ATL_se0_ag1_f_03_1
2339,607157.3,"AND THAT'S MY FAVORITE TIME LIKE,",608613.0,ATL_se0_ag1_f_03_1
6403,1719321.1,(PAUSE 1.20),1720518.3,ATL_se0_ag1_f_01_1
3044,1530757.2,IN BROOKLYN AND STUFF?,1531929.6,ATL_se0_ag1_f_03_1
6056,1152410.0,(PAUSE 1.00),1153414.9,ATL_se0_ag1_f_01_1
1809,2627182.3,I ROCK SOME ONES.,2628279.7,ATL_se0_ag1_m_01_1
3814,550778.0,(PAUSE 0.13),550906.6,ATL_se0_ag1_f_02_1


In [14]:
# Filter out rows with unwanted characters in the 'Content' column
pattern = r'[\(\)\[\]/<>]'
filtered_transcript_df = combined_transcript_df[~combined_transcript_df['Content'].str.contains(pattern)].reset_index(drop=True)
display(filtered_transcript_df.sample(10))

Unnamed: 0,StTime,Content,EnTime,Interview
2633,545570.2,STUFF LIKE THAT. LITERATURE AND STUFF LIKE THAT,547703.0,ATL_se0_ag1_f_01_1
2482,115858.9,"FRIEND OF FAMILY, UH",117750.8,ATL_se0_ag1_f_01_1
390,1218084.4,FUCKING CUSTODIAN LIKE SERVICE.,1220146.1,ATL_se0_ag1_m_01_1
2161,1381969.2,YOU- YOU- YOU DIDDY BOPPING.,1383861.4,ATL_se0_ag1_f_02_1
204,627724.7,"MY COUSIN WENT UP THERE TOO,",629045.1,ATL_se0_ag1_m_01_1
1298,1142424.6,"AS FAR AS POWER, WHAT",1144118.4,ATL_se0_ag1_f_03_1
1305,1173274.4,"UM,",1173962.9,ATL_se0_ag1_f_03_1
1091,540515.2,OKAY.,541023.8,ATL_se0_ag1_f_03_1
2424,2154805.6,THEY HAD THE BRUH.,2155753.0,ATL_se0_ag1_f_02_1
1232,950163.7,OR REGGAE.,950908.0,ATL_se0_ag1_f_03_1


In [15]:
# subset_size = int(len(filtered_transcript_df)*.25)
subset_size = 150
data_subset = filtered_transcript_df.sample(subset_size)
print(len(data_subset))

150


In [16]:
# Split the data into training and testing sets
# train_df, test_df = train_test_split(filtered_transcript_df, test_size=0.2, random_state=42)
train_df, test_df = train_test_split(data_subset, test_size=0.2, random_state=42)

In [17]:
print(f"Train Dataset: {len(train_df)}\t Test Dataset: {len(test_df)}")

Train Dataset: 120	 Test Dataset: 30


In [18]:
# Initialize processor and model
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
config = WhisperConfig.from_pretrained("openai/whisper-tiny.en")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en").to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/185k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.41M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.83k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.94k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/151M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

In [19]:
# Create datasets and dataloaders
train_dataset = AudioDataset(train_df, transcript_dir, audio_dir, processor)
test_dataset = AudioDataset(test_df, transcript_dir, audio_dir, processor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [20]:
# Training function
def train(model, train_loader, processor, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    true_transcriptions = []
    predicted_transcriptions = []

    for batch in tqdm(train_loader, total=len(train_loader), desc='Training'):
        inputs = batch['input_features'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_features=inputs, labels=labels)
        loss = outputs.loss

        outputs = model.generate(input_features=inputs)
        transcriptions = processor.batch_decode(outputs, skip_special_tokens=True)
        true_transcriptions.extend(processor.batch_decode(labels, skip_special_tokens=True))
        predicted_transcriptions.extend(transcriptions)

        predicted_transcriptions = [x.upper() for x in predicted_transcriptions]
        wer_score = wer(true_transcriptions, predicted_transcriptions)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
    total_loss = total_loss / len(train_loader)

    return total_loss, wer_score

# Evaluation function
def test(model, test_loader, processor, device):
    model.eval()
    total_loss = 0
    true_transcriptions = []
    predicted_transcriptions = []

    with torch.no_grad():
        # for batch in tqdm(test_loader, total=len(test_loader), desc='Testing'):
        for batch in test_loader:
            inputs = batch['input_features'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_features=inputs, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            outputs = model.generate(input_features=inputs)
            transcriptions = processor.batch_decode(outputs, skip_special_tokens=True)
            true_transcriptions.extend(processor.batch_decode(labels, skip_special_tokens=True))
            predicted_transcriptions.extend(transcriptions)

    total_loss = total_loss / len(train_loader)
    predicted_transcriptions = [x.upper() for x in predicted_transcriptions]
    wer_score = wer(true_transcriptions, predicted_transcriptions)

    return total_loss, wer_score

In [22]:
import torch
from sklearn.model_selection import train_test_split
from transformers import WhisperProcessor, WhisperConfig, WhisperForConditionalGeneration
from torch.utils.data import DataLoader
from tqdm import tqdm
from jiwer import wer
from datetime import datetime

# Define your AudioDataset class if not already defined
# Define your `train` and `test` functions as provided

def grid_search(params, train_loader, test_loader, processor, device):
    best_params = None
    best_wer = float('inf')
    results = []

    for lr in params['learning_rate']:
        for batch_size in params['batch_size']:
            for num_epochs in params['num_epochs']:
                for weight_decay in params['weight_decay']:
                    for scheduler_gamma in params['scheduler_gamma']:
                        print(f"Training with lr={lr}, batch_size={batch_size}, num_epochs={num_epochs}, weight_decay={weight_decay}, scheduler_gamma={scheduler_gamma}")

                        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en").to(device)
                        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
                        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=scheduler_gamma)

                        for epoch in range(num_epochs):
                            train_loss, train_wer = train(model, train_loader, processor, optimizer, scheduler, device)
                            print(f"Epoch {epoch+1}/{num_epochs}\t Training Loss: {train_loss:.4f}, Word Error Rate (WER): {train_wer:.4f}")

                            test_loss, test_wer = test(model, test_loader, processor, device)
                            print(f"Epoch {epoch+1}/{num_epochs}\t Test Loss: {test_loss:.4f}, Word Error Rate (WER): {test_wer:.4f}")

                        results.append({
                            'learning_rate': lr,
                            'batch_size': batch_size,
                            'num_epochs': num_epochs,
                            'weight_decay': weight_decay,
                            'scheduler_gamma': scheduler_gamma,
                            'train_loss': train_loss,
                            'train_wer': train_wer,
                            'test_loss': test_loss,
                            'test_wer': test_wer
                        })

                        if test_wer < best_wer:
                            best_wer = test_wer
                            best_params = {
                                'learning_rate': lr,
                                'batch_size': batch_size,
                                'num_epochs': num_epochs,
                                'weight_decay': weight_decay,
                                'scheduler_gamma': scheduler_gamma
                            }
                            # Save the best model
                            timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
                            best_model_path = f"../weights/best_whisper_model_{timestamp}.pt"
                            torch.save(model.state_dict(), best_model_path)

    return best_params, results, best_model_path

# Example usage
params = {
    'learning_rate': [1e-5, 1e-4, 1e-3],
    'batch_size': [16, 32, 64],
    'num_epochs': [5, 10, 15],
    'weight_decay': [0, 1e-4, 1e-3],
    'scheduler_gamma': [0.9, 0.8, 0.7]
}

# Initialize processor and model
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Split the data into training and testing sets
train_df, test_df = train_test_split(data_subset, test_size=0.2, random_state=42)

# Create datasets and dataloaders
train_dataset = AudioDataset(train_df, transcript_dir, audio_dir, processor)
test_dataset = AudioDataset(test_df, transcript_dir, audio_dir, processor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

best_params, results, best_model_path = grid_search(params, train_loader, test_loader, processor, device)

print(f"Best parameters: {best_params}")
print(f"Results: {results}")
print(f"Best model saved to: {best_model_path}")

# Fine-tuning with best parameters
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en").to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=best_params['scheduler_gamma'])
num_epochs = best_params['num_epochs']
train_loss_per_epoch = []
train_wer_per_epoch = []
test_loss_per_epoch = []
test_wer_per_epoch = []

for epoch in range(num_epochs):
    train_loss, train_wer = train(model, train_loader, processor, optimizer, scheduler, device)
    print(f"Epoch {epoch+1}/{num_epochs}\t Training Loss: {train_loss:.4f}, Word Error Rate (WER): {train_wer:.4f}")
    train_loss_per_epoch.append(train_loss)
    train_wer_per_epoch.append(train_wer)

    test_loss, test_wer = test(model, test_loader, processor, device)
    print(f"Epoch {epoch+1}/{num_epochs}\t Test Loss: {test_loss:.4f}, Word Error Rate (WER): {test_wer:.4f}")
    test_loss_per_epoch.append(test_loss)
    test_wer_per_epoch.append(test_wer)

timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')  # Format: YYYY-MM-DD_HH:MM:SS
weights_filepath = "../weights/whisper-weights_"

torch.save(model.state_dict(), weights_filepath + timestamp)
print(f"Model weights saved to {weights_filepath + timestamp}")


Training with lr=1e-05, batch_size=16, num_epochs=5, weight_decay=0, scheduler_gamma=0.9


Training:   0%|          | 0/8 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Training: 100%|██████████| 8/8 [21:30<00:00, 161.32s/it]


Epoch 1/5	 Training Loss: 0.5612, Word Error Rate (WER): 0.5526
Epoch 1/5	 Test Loss: 0.0372, Word Error Rate (WER): 0.6197


Training: 100%|██████████| 8/8 [21:09<00:00, 158.75s/it]


Epoch 2/5	 Training Loss: 0.1267, Word Error Rate (WER): 0.5561
Epoch 2/5	 Test Loss: 0.0332, Word Error Rate (WER): 0.6056


Training: 100%|██████████| 8/8 [17:21<00:00, 130.14s/it]


Epoch 3/5	 Training Loss: 0.1183, Word Error Rate (WER): 0.5544
Epoch 3/5	 Test Loss: 0.0324, Word Error Rate (WER): 0.6056


Training: 100%|██████████| 8/8 [17:21<00:00, 130.23s/it]


Epoch 4/5	 Training Loss: 0.1165, Word Error Rate (WER): 0.5544
Epoch 4/5	 Test Loss: 0.0322, Word Error Rate (WER): 0.6056


Training: 100%|██████████| 8/8 [17:33<00:00, 131.73s/it]


Epoch 5/5	 Training Loss: 0.1151, Word Error Rate (WER): 0.5509
Epoch 5/5	 Test Loss: 0.0321, Word Error Rate (WER): 0.6056


RuntimeError: Parent directory ../weights does not exist.

In [23]:
# Fine-tuning
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
num_epochs = 10
train_loss_per_epoch = []
train_wer_per_epoch = []

for epoch in range(num_epochs):
    train_loss, train_wer = train(model, train_loader, processor, optimizer, scheduler, device)
    print(f"Epoch {epoch+1}/{num_epochs}\t Training Loss: {train_loss:.4f}, Word Error Rate (WER): {train_wer:.4f}")
    train_loss_per_epoch.append(train_loss)
    train_wer_per_epoch.append(train_wer)

Training:   0%|          | 0/8 [00:05<?, ?it/s]


KeyboardInterrupt: 

In [None]:
test_loss, test_wer = test(model, test_loader, processor, device)
print(f"Running inference...\t Test Loss: {test_loss:.4f}, Word Error Rate (WER): {test_wer:.4f}")

Running inference...	 Test Loss: 0.0305, Word Error Rate (WER): 0.4022


In [None]:
from datetime import datetime

timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') # Format: YYYY-MM-DD_HH:MM:SS
weights_filepath = "../weights/whisper-weights_"

torch.save(model.state_dict(), weights_filepath+timestamp)
print(f"Model weights saved to {weights_filepath+timestamp}")

Model weights saved to ../weights/whisper-weights_2024-07-23_16:16:07
