## Step 1: Setup Python Packages

In [1]:
pip install gtts librosa noisereduce pandas numpy

Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Collecting noisereduce
  Downloading noisereduce-3.0.2-py3-none-any.whl (22 kB)
Installing collected packages: gtts, noisereduce
Successfully installed gtts-2.5.1 noisereduce-3.0.2


In [2]:
import os

import pandas as pd
import numpy as np

from gtts import gTTS
import librosa
import noisereduce as nr

from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

## Step 2: Load Dataset

In [3]:
df = pd.read_csv('Online Content.csv')

In [4]:
df.head()

Unnamed: 0,id,title,length,popularity,liked,disliked,shared
0,1,10 Effective Marketing Strategies,15:32,1500,120,10,50
1,2,The Power of Social Media Marketing,12:45,2500,180,20,80
2,3,Content Marketing Tips for Success,18:20,1800,150,15,60
3,4,How to Optimize Your Website for SEO,22:15,2100,160,10,70
4,5,The Art of Influencer Marketing,14:50,1900,140,20,50


## Step 3: Audio Directory Setup

In [5]:
output_folder = '/content/AudioData/'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

## Step 4: Convert Text to Speech

In [6]:
def text_to_speech(text, filename):
    tts = gTTS(text=text, lang='en')
    filepath = os.path.join(output_folder, filename)
    tts.save(filepath)
    return filepath

## Step 5: Feature Extraction

In [7]:
def extract_mfcc(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)

    # Reduce noise
    y = nr.reduce_noise(y=y, sr=sr)

    # Calculate MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Normalize MFCCs
    mfcc = (mfcc - np.mean(mfcc, axis=0)) / np.std(mfcc, axis=0)
    return mfcc.transpose()

## Step 6: Process Data

In [8]:
tokenizer = Tokenizer(char_level=True)

tokenizer.fit_on_texts(df['title'])

sequences = tokenizer.texts_to_sequences(df['title'])

labels_padded = pad_sequences(sequences, maxlen=100, padding='post', value=-1)

In [9]:
def custom_to_categorical(label, num_classes=100):
    label_expanded = np.zeros((label.shape[0], label.shape[1], num_classes))
    for i in range(label.shape[0]):
        for j in range(label.shape[1]):
            if label[i, j] != -1:
                label_expanded[i, j, label[i, j]] = 1
    return label_expanded

In [10]:
labels_onehot = custom_to_categorical(labels_padded)

In [11]:
audio_paths = []
mfcc_features = []

for index, title in enumerate(df['title']):
    file_path = text_to_speech(title, f'audio_{index+1}.mp3')
    audio_paths.append(file_path)
    mfcc = extract_mfcc(file_path)
    mfcc_features.append(mfcc)

# Padding MFCC features
mfcc_features_padded = pad_sequences(mfcc_features, maxlen=100, padding='post', dtype='float32')

## Step 7: Save Labels and Processed Audio

In [12]:
np.save('audio.npy', mfcc_features_padded)

In [13]:
np.save('labels.npy', labels_onehot)