In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.decomposition import PCA

In [None]:
labels = [f"label_{i+1}" for i in range(4)]
features = [f'feature_{i+1}' for i in range(768)]
label_1 = labels[0]
label_2 = labels[1]
label_3 = labels[2]
label_4 = labels[3]

In [None]:
train = pd.read_csv("/kaggle/input/ml-project/train.csv")
valid = pd.read_csv("/kaggle/input/ml-project/valid.csv")
test = pd.read_csv("/kaggle/input/ml-project/test.csv")

In [None]:
X_train = {}
y_train = {}
X_valid = {}
y_valid = {}
X_test = {}
for label in labels:
    train_df = train
    valid_df = valid
    test_df = test
    scaler = StandardScaler()

    # Apply feature scaling to training data
    X_train[label] = pd.DataFrame(scaler.fit_transform(train_df.drop(labels, axis=1)), columns=features)
    y_train[label] = train_df[label]
    X_train[label].info()

    # Apply feature scaling to validation data
    X_valid[label] = pd.DataFrame(scaler.transform(valid_df.drop(labels, axis=1)), columns=features)
    y_valid[label] = valid_df[label]

    # Apply feature scaling to test data
    X_test[label] = pd.DataFrame(scaler.transform(test_df.drop("ID", axis=1)), columns=features)
    

# **Label 01**

In [None]:
def build_knn_model(X_train, y_train, n_neighbors):
    # Create and train your KNN classifier model
    # You can perform hyperparameter tuning by changing the value of 'n_neighbors'
    knn = KNeighborsClassifier(n_neighbors)  
    knn.fit(X_train, y_train)
    return knn

In [None]:
def feature_engineering_with_pca(df_train, df_valid, n_components):

    # Apply PCA for dimensionality reduction
    pca = PCA(n_components=n_components)
    
    # Create a new DataFrame with the PCA-transformed features
    X_train_pca = pca.fit_transform(X_train[label_1])  
    X_train_df_pca = pd.DataFrame(X_train_pca_1)
    X_valid_df_pca = pd.DataFrame(pca.transform(X_valid[label_1]))
    print("Shape :", X_train_df_pca_1.shape)
    print("Shape :", X_valid_df_pca_1.shape)
    
    return X_train_df_pca, X_valid_df_pca

In [None]:
train_df_speaker_x = pd.read_csv("/kaggle/input/ml-project/train.csv")

# Pre-process and engineer features for Speaker Recognition
X_speaker_x = preprocess_data(train_df_speaker_x)
X_speaker_x_pca = feature_engineering(X_speaker_x, n_components=10)  # Specify the number of components for PCA

# Define labels for Speaker Recognition
y_speaker_x = train_df_speaker_x["Speaker"]

# Split data into training and validation sets
X_train_speaker_x, X_val_speaker_x, y_train_speaker_x, y_val_speaker_x = train_test_split(X_speaker_x, y_speaker_x, test_size=0.2, random_state=42)

# Build a KNN model for Speaker Recognition
model_speaker_x = build_model(X_train_speaker_x, y_train_speaker_x)

# Evaluate the model on the validation set
y_pred_speaker_x = model_speaker_x.predict(X_val_speaker_x)

# Calculate evaluation metrics (e.g., accuracy, F1-score) for Speaker Recognition
accuracy_speaker_x = accuracy_score(y_val_speaker_x, y_pred_speaker_x)
f1_score_speaker_x = f1_score(y_val_speaker_x, y_pred_speaker_x, average='weighted')

# Print the evaluation results
print(f"Accuracy for Speaker Recognition: {accuracy_speaker_x}")
print(f"F1 Score for Speaker Recognition: {f1_score_speaker_x}")
