In [133]:
import warnings
warnings.filterwarnings('ignore')
import os
from tqdm import tqdm
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyts.image import GramianAngularField
import cv2

from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV, StratifiedKFold
from sklearn.svm import SVC

from sklearn.metrics import classification_report, ConfusionMatrixDisplay

In [141]:
import speech_recognition as sr


#Speech to text
def speech_to_text():
    recognizer = sr.Recognizer()
#
    with sr.Microphone() as source:
        print("Say something...")
        audio = recognizer.listen(source, timeout = 1)

    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        return "Sorry, I could not understand audio."
    except sr.RequestError as e:
        return "Could not request results from Google Web Speech API; {0}".format(e)

if __name__ == "__main__":
    recognized_text = speech_to_text()
    print("You said:", recognized_text)


Say something...
Recognizing...
You said: Sorry, I could not understand audio.


In [137]:
import sounddevice as sd
import librosa
import numpy as np
import threading

# Parameters
sample_rate = 44100
chunk_duration = 0.1  # Chunk duration in seconds
recording = True
audio_data = []

# Function to record audio in a separate thread
def record_audio():
    global audio_data, recording
    print("Recording... Press 'c' and Enter to stop.")
    while recording:
        chunk = sd.rec(int(sample_rate * chunk_duration), samplerate=sample_rate, channels=1)
        audio_data.extend(chunk)
        sd.wait()

# Start the audio recording thread
recording_thread = threading.Thread(target=record_audio)
recording_thread.start()

# Wait for 'c' to be pressed
input("Press 'c' and Enter to stop recording: ")

# Stop recording
recording = False
recording_thread.join()

# Convert audio data to NumPy array
audio_data = np.concatenate(audio_data, axis=0)

# Compute the Short-Time Fourier Transform (STFT)
D = librosa.stft(audio_data)

# Convert magnitude spectrogram to decibels (dB)
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

# Convert decibels to text representation
text_representation = ""
for frame in S_db.T:
    frame_text = " ".join(map(str, frame))
    text_representation += frame_text + "\n"

# Save the frequency text representation to a file
with open("frequency_output.txt", "w") as file:
    file.write(text_representation)


# # Normalize the frequency values
# frequency_values = np.loadtxt("frequency_output.txt", dtype=float)
# normalized_frequency_values = (frequency_values - np.min(frequency_values)) / (np.max(frequency_values) - np.min(frequency_values))

# # Save the normalized frequency values to a new text file
# normalized_frequency_file_path = "normalized_frequency_output.txt"
# np.savetxt(normalized_frequency_file_path, normalized_frequency_values, fmt='%.6f')
# # ... (Previous code up to saving the normalized_frequency_output.txt file)

# # Directly save each normalized frequency on a new line
# normalized_frequency_file_path = "normalized_frequency_output_lines.txt"
# with open("normalized_frequency_output.txt", "r") as input_file, open(normalized_frequency_file_path, "w") as output_file:
#     for line in input_file:
#         values = line.strip().split()  # Split the line into individual values
#         for value in values:
#             float_value = float(value)
#             output_file.write(f"{float_value:.6f}\n")


Recording... Press 'c' and Enter to stop.
Press 'c' and Enter to stop recording: c


In [99]:
source_folder = "/home/nipun/001_Hackathon/VOICED_DATASET"
target_folder = "/home/nipun/001_Hackathon/imgs"
healthy_folder = os.path.join(target_folder, 'healthy')
pathology_folder = os.path.join(target_folder, 'pathological')

# Create the target folder if it doesn't exist
os.makedirs(target_folder, exist_ok=True)

# List all files in the source folder
file_list = os.listdir(source_folder)

In [93]:
# for filename in tqdm(file_list, desc="Processing files"):
#     if filename.endswith('-info.txt'):
#         file_path = os.path.join(source_folder, filename)
#         with open(file_path, 'r') as file:
#             lines = file.readlines()
        
#         # Find the line with the "Diagnosis" information
#         diagnosis_line_index = None
#         for i, line in enumerate(lines):
#             if 'Diagnosis:' in line:
#                 diagnosis_line_index = i
#                 break
        
#         if diagnosis_line_index is not None:
#             # Extract the diagnosis part and update the content
#             diagnosis = lines[diagnosis_line_index].split(':')[1].strip()
#             lines = [f"{diagnosis}"]
        
#             # Write the updated content back to the file
#             with open(file_path, 'w') as file:
#                 file.writelines(lines)
    

Processing files: 100%|██████████| 834/834 [00:00<00:00, 22561.37it/s]


In [100]:
file_list = os.listdir(source_folder)

for filename in tqdm(file_list, desc="Processing files"):
        if filename.endswith('.txt') and not filename.endswith('-info.txt'):
            # Read the data from the file
            file_path = os.path.join(source_folder, filename)
            data = pd.read_csv(file_path, delimiter='\t')

            array = data.values
            array = np.transpose(array)

            gasf = GramianAngularField(method='summation', image_size=250)
            img1 = gasf.transform(array)

            gadf = GramianAngularField(method='difference', image_size=250)
            img2 = gadf.transform(array)

            # Concatenate the transformed images
            img = np.concatenate((img1, img2, np.zeros((1, 250, 250))), axis=0)

            # Determine the label from the filename
            info_filename = filename.replace('.txt', '-info.txt')
            with open(os.path.join(source_folder, info_filename), 'r') as file:
                label = file.readline().strip().split()[-1]

            # Save the produced image in the appropriate folder as jpg
            if label == 'healthy':
                target_path = os.path.join(healthy_folder, filename.replace('.txt', '.jpg'))
            else:
                target_path = os.path.join(pathology_folder, filename.replace('.txt', '.jpg'))

            # Convert numpy array to PIL Image and save as jpg
            img = (img * 255).astype(np.uint8)
            img = np.transpose(img, (1, 2, 0))
            pil_img = Image.fromarray(img)
            pil_img.save(target_path)

Processing files: 100%|██████████| 834/834 [00:02<00:00, 280.98it/s]


In [108]:
SZ = 20
bin_n = 32 # Number of bins
affine_flags = cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR
def deskew(img):
   img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
   m = cv2.moments(img)
   if abs(m['mu02']) < 1e-2:
     return img.copy()
   skew = m['mu11']/m['mu02']
   M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
   img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)
   return img

In [103]:
def hog(img):
   gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
   gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
   mag, ang = cv2.cartToPolar(gx, gy)
   bins = np.int32(bin_n*ang/(2*np.pi))
   bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
   mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
   hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
   hist = np.hstack(hists) # hist is a 64 bit vector
   return hist

In [109]:
# Initialize variables
img_folder_1 = "/home/nipun/001_Hackathon/imgs/healthy"
img_folder_2 = "/home/nipun/001_Hackathon/imgs/pathological"
imgs = []
labels = []

# Process images in folder 1
for img_name in os.listdir(img_folder_1):
    img_path = os.path.join(img_folder_1, img_name)
    img = cv2.imread(img_path)
    if img is not None:
        img = deskew(img)
        img = hog(img)
        imgs.append(img)
        labels.append(0)

# Process images in folder 2
for img_name in os.listdir(img_folder_2):
    img_path = os.path.join(img_folder_2, img_name)
    img = cv2.imread(img_path)
    if img is not None:
        img = deskew(img)
        img = hog(img)
        imgs.append(img)
        labels.append(1)

# Convert the lists to NumPy arrays
imgs = np.array(imgs)
labels = np.array(labels)

In [112]:
df = pd.DataFrame(imgs, labels)

In [132]:
#DNN 

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.2, random_state=42)

# Define the DNN model
model = models.Sequential([
    layers.Input(shape=(128,)),  # Input shape (number of features)
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(2, activation='softmax')  # Output layer with softmax activation
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(imgs, labels, epochs=10, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test accuracy:", accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.9285714030265808


In [140]:
def txt_to_data():
    with open('frequency_output.txt','r') as file:
        data = pd.read_csv(file_path, delimiter='\t')

        array = data.values
        array = np.transpose(array)
        gasf = GramianAngularField(method='summation', image_size=250)
        img1 = gasf.transform(array)
        gadf = GramianAngularField(method='difference', image_size=250)
        img2 = gadf.transform(array)
        # Concatenate the transformed images
        img = np.concatenate((img1, img2, np.zeros((1, 250, 250))), axis=0)
        img = (img * 255).astype(np.uint8)
        img = np.transpose(img, (1, 2, 0))
        img = Image.fromarray(img)
        if img is not None:
            img = deskew(img)
            img = hog(img)
        return img
img = txt_to_data()   
model.predict(img)
        

error: OpenCV(4.8.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'
