## Sound Localization Test Assesment Model

Model used: Random Forest Classifier


In [29]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

In [None]:
data = pd.read_csv('/content/sound_localization_data.csv')

In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   UserID                 400 non-null    int64  
 1   Age                    400 non-null    int64  
 2   Gender                 400 non-null    object 
 3   Accuracy (%)           400 non-null    float64
 4   ResponseTime (ms)      400 non-null    float64
 5   NumCorrectSounds       400 non-null    float64
 6   HearingClassification  400 non-null    object 
dtypes: float64(3), int64(2), object(2)
memory usage: 22.0+ KB


In [32]:
data.head()

Unnamed: 0,UserID,Age,Gender,Accuracy (%),ResponseTime (ms),NumCorrectSounds,HearingClassification
0,1,62,Male,95.03,2061.0,24.0,Good
1,2,60,Male,69.44,1302.0,17.0,Normal
2,3,36,Male,82.98,2107.0,21.0,Normal
3,4,37,Male,94.43,1478.0,24.0,Good
4,5,32,Male,82.18,1425.0,21.0,Normal


In [33]:
print(data.shape)

(400, 7)


In [34]:
print(data['HearingClassification'].value_counts())

HearingClassification
Normal    156
Bad       134
Good      110
Name: count, dtype: int64


In [35]:
features = data[['Accuracy (%)', 'ResponseTime (ms)', 'NumCorrectSounds']]
target = data['HearingClassification']

target_encoder = LabelEncoder()
target_encoded = target_encoder.fit_transform(target)

target = pd.Series(target_encoded)

print(target.value_counts())

2    156
0    134
1    110
Name: count, dtype: int64


In [36]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)
scaled_features_df = pd.DataFrame(scaled_features, columns=features.columns)

X_train, X_temp, y_train, y_temp = train_test_split(scaled_features_df, target, test_size=0.5, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

In [37]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(200, 3)
(140, 3)
(60, 3)


In [38]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

In [39]:
cv_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')

print(f"Cross-Validation Scores: {cv_scores}")
print(f"\nMean CV Score: {np.mean(cv_scores)}")

Cross-Validation Scores: [1.   1.   1.   1.   1.   1.   1.   0.95 1.   1.  ]

Mean CV Score: 0.9949999999999999


In [40]:
y_pred_val = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred_val)
print(f"Validation Accuracy: {accuracy}")
print("\nValidation Confusion Matrix:\n", confusion_matrix(y_val, y_pred_val))
print("\nValidation Classification Report:\n", classification_report(y_val, y_pred_val))

Validation Accuracy: 1.0

Validation Confusion Matrix:
 [[22  0  0]
 [ 0 16  0]
 [ 0  0 22]]

Validation Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        16
           2       1.00      1.00      1.00        22

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60



In [41]:
y_pred_test = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_test)
print(f"Test Accuracy: {accuracy}")
print("\nTest Confusion Matrix:\n", confusion_matrix(y_test, y_pred_test))
print("\nTest Classification Report:\n", classification_report(y_test, y_pred_test))

Test Accuracy: 1.0

Test Confusion Matrix:
 [[45  0  0]
 [ 0 36  0]
 [ 0  0 59]]

Test Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        45
           1       1.00      1.00      1.00        36
           2       1.00      1.00      1.00        59

    accuracy                           1.00       140
   macro avg       1.00      1.00      1.00       140
weighted avg       1.00      1.00      1.00       140



In [42]:
pickle.dump(model, open('sound_localization_model.pkl', 'wb'))