<a href="https://colab.research.google.com/github/DylanCTY/TextAnalyticsProject_Group5/blob/main/GR3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/ktcktc26/ta5_gender.git

Cloning into 'ta5_gender'...
remote: Enumerating objects: 109, done.[K
remote: Counting objects: 100% (109/109), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 109 (delta 0), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (109/109), 6.00 MiB | 14.40 MiB/s, done.


In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
# Convert M4A TO WAV
import os
from pydub import AudioSegment

def convert_m4a_to_wav(m4a_file, wav_file):
    sound = AudioSegment.from_file(m4a_file)
    sound.export(wav_file, format="wav")

def batch_convert_m4a_to_wav(input_folder, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get a list of all files in the input folder
    files = os.listdir(input_folder)

    # Iterate through each file
    for file in files:
        # Check if the file is an m4a file
        if file.endswith(".m4a"):
            # Construct full paths for input and output files
            m4a_path = os.path.join(input_folder, file)
            wav_path = os.path.join(output_folder, file.replace(".m4a", ".wav"))

            # Convert m4a to wav
            print(f"Converting {m4a_path} to {wav_path}")
            convert_m4a_to_wav(m4a_path, wav_path)

# Convert female
batch_convert_m4a_to_wav("/content/ta5_gender/dataset_gender/Female", "/content/ta5_gender/dataset_wav/Female")
# Convert male
batch_convert_m4a_to_wav("/content/ta5_gender/dataset_gender/Male", "/content/ta5_gender/dataset_wav/Male")

Converting /content/ta5_gender/dataset_gender/Female/76.m4a to /content/ta5_gender/dataset_wav/Female/76.wav
Converting /content/ta5_gender/dataset_gender/Female/2268.m4a to /content/ta5_gender/dataset_wav/Female/2268.wav
Converting /content/ta5_gender/dataset_gender/Female/2296.m4a to /content/ta5_gender/dataset_wav/Female/2296.wav
Converting /content/ta5_gender/dataset_gender/Female/30.m4a to /content/ta5_gender/dataset_wav/Female/30.wav
Converting /content/ta5_gender/dataset_gender/Female/9.m4a to /content/ta5_gender/dataset_wav/Female/9.wav
Converting /content/ta5_gender/dataset_gender/Female/33.m4a to /content/ta5_gender/dataset_wav/Female/33.wav
Converting /content/ta5_gender/dataset_gender/Female/2294.m4a to /content/ta5_gender/dataset_wav/Female/2294.wav
Converting /content/ta5_gender/dataset_gender/Female/2271.m4a to /content/ta5_gender/dataset_wav/Female/2271.wav
Converting /content/ta5_gender/dataset_gender/Female/2282.m4a to /content/ta5_gender/dataset_wav/Female/2282.wav
C

In [None]:
#Build Training Data Set from Female and Male Voices
import librosa
import pandas as pd

def extract_pitch(wav_file):
    # Load audio file
    audio, sr = librosa.load(wav_file, sr=None)

    # Extract pitch information
    pitch, mag = librosa.piptrack(y=audio, sr=sr)
    pitch = pitch[:, 0]  # Extract the first channel if it's stereo

    # Calculate mean, max, min
    mean_pitch = pitch.mean()
    max_pitch = pitch.max()
    min_pitch = pitch.min()

    return mean_pitch, max_pitch, min_pitch

def build_pitch_data(test_folder):
    # Initialize lists to store pitch data
    mean_pitches = []
    max_pitches = []
    min_pitches = []

    # Get a list of all files in the input folder
    files = os.listdir(test_folder)

    # Iterate through each file
    for file in files:
        # Check if the file is a WAV file
        if file.endswith(".wav"):
            wav_file = os.path.join(test_folder, file)

            # Extract pitch from the WAV file
            mean_pitch, max_pitch, min_pitch = extract_pitch(wav_file)

            # Store pitch data
            mean_pitches.append(mean_pitch)
            max_pitches.append(max_pitch)
            min_pitches.append(min_pitch)

  # Create DataFrame
    data = {
        "mean_pitch": mean_pitches,
        "max_pitch": max_pitches,
        "min_pitch": min_pitches,
    }

    df = pd.DataFrame(data)
    return df

# Build Training Data Set
female_data = "/content/ta5_gender/dataset_wav/Female"
male_data = "/content/ta5_gender/dataset_wav/Male"

df_f = build_pitch_data(female_data)
df_f['gender'] = 1
df_m = build_pitch_data(male_data)
df_m['gender'] = 0

df = pd.concat([df_f, df_m])

df.tail()


Unnamed: 0,mean_pitch,max_pitch,min_pitch,gender
45,8.109263,1742.610596,0.0,0
46,12.164597,1352.468628,0.0,0
47,20.253422,1824.171997,0.0,0
48,2.383928,702.612915,0.0,0
49,83.146263,3888.390625,0.0,0


In [None]:
df.shape


(100, 4)

In [None]:
# Build Model
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np


def parameter_tuning_svm(input_df):
    x = input_df[['mean_pitch' , 'max_pitch', 'min_pitch']].values
    y = input_df['gender'].values
   #svc = SVC(kernel='linear')

    #segmenting data set and cross validation
    training, testing, training_result, testing_result = train_test_split(x, y, test_size=0.4, random_state=1)
    # scores = cross_val_score(svc, training, training_result, cv=10, scoring='accuracy')
    # print scores.mean()

    #Tuning C value
    c_vals = list(range(1,5))
    accuracy_vals = []
    for val in c_vals:
        svc = SVC(kernel='linear', C=val)
        scores = cross_val_score(svc, training, training_result, cv=10, scoring='accuracy')
        accuracy_vals.append(scores.mean())

    # plt.plot(c_vals, accuracy_vals)
    # plt.xticks(np.arange(0,30,2))
    # plt.xlabel('C values')
    # plt.ylabel('Mean Accuracies')
    # plt.show()

    optimal_cval = c_vals[accuracy_vals.index(max(accuracy_vals))]
    print(optimal_cval)

    #gamma value tuning
    gamma_vals = [0.00001,0.0001,0.001,0.01,0.1]
    accuracy_vals = []
    for g in gamma_vals:
        svc = SVC(kernel='linear', C=optimal_cval, gamma=g)
        scores = cross_val_score(svc, training, training_result, cv=10, scoring='accuracy')
        accuracy_vals.append(scores.mean())

    optimal_gamma = gamma_vals[accuracy_vals.index(max(accuracy_vals))]
    print(optimal_gamma)

    svc = SVC(kernel='linear', C=optimal_cval, gamma=optimal_gamma)
    svc.fit(training, training_result)
    testing_predict = svc.predict(testing)
    print(metrics.accuracy_score(testing_predict, testing_result))

    svc = SVC(kernel='linear', C=optimal_cval, gamma=optimal_gamma)
    svc.fit(x,y)
    return svc

In [None]:
# Train Model
tuned_svm = parameter_tuning_svm(df)


1
1e-05
0.4


In [None]:
# Build Model (to cut time)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np

x = df[['mean_pitch' , 'max_pitch', 'min_pitch']].values
y = df['gender'].values

#segmenting data set and cross validation
training, testing, training_result, testing_result = train_test_split(x, y, test_size=0.4, random_state=1)
svc = SVC(kernel='linear', C=1, gamma=0.01)
svc.fit(training, training_result)
testing_predict = svc.predict(testing)
print(metrics.accuracy_score(testing_predict, testing_result))

tuned_svm = svc


0.4


In [None]:
# Prediction
test_folder = "/content/test"
test_wav = "/content/test_wav"

# Convert m4a to wav
batch_convert_m4a_to_wav(test_folder, test_wav)

# Prediction
df_test = build_pitch_data(test_wav)
df_test.head()

prediction = []
for i in range(len(df_test)):
  values = np.array(df_test[['mean_pitch' , 'max_pitch', 'min_pitch']].iloc[i], dtype=np.float64)
  values = values.reshape(1, -1)
  result = tuned_svm.predict(values)
  if result == 0:
    prediction.append("female")
  else:
    prediction.append("male")

x = 0
files = os.listdir(test_wav)

for file in files:
    # Check if the file is a WAV file
    if file.endswith(".wav"):
    # Construct full paths for test files
        test_path = os.path.join(test_wav, file)
        file_name = os.path.basename(test_path)
        print(f"File {file_name} is {prediction[x]}")
        x = x+1





Converting /content/test/female_test.m4a to /content/test_wav/female_test.wav
Converting /content/test/male_test.m4a to /content/test_wav/male_test.wav
File female_test.wav is female
File male_test.wav is female
['female', 'female']
