In [1]:
import pandas as pd
from sklearn.impute import KNNImputer
import numpy as np

# Replace the file names below with the actual file paths
dataset_files = {
    "Dataset1": "MissingData1.txt",
    "Dataset2": "MissingData2.txt",
    "Dataset3": "MissingData3.txt"
}

# Imputation Methods
def mean_imputation(data):
    """Impute missing values using column means."""
    return data.apply(lambda col: col.fillna(col.mean()), axis=0)

def knn_imputation(data, n_neighbors=5):
    """Impute missing values using k-NN imputer."""
    imputer = KNNImputer(n_neighbors=n_neighbors)
    return pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Process datasets
for dataset_name, file_path in dataset_files.items():
    # Load dataset
    data = pd.read_csv(file_path, delimiter='\t', header=None)
    # Replace missing values (1.00000000000000e+99) with NaN
    data.replace(1.00000000000000e+99, np.nan, inplace=True)

    # Mean Imputation
    mean_imputed_data = mean_imputation(data)
    mean_imputed_data.to_csv(f"{dataset_name}_MeanImputed.txt", sep='\t', index=False, header=False)
    print(f"{dataset_name} Mean Imputation complete. Saved to {dataset_name}_MeanImputed.txt")

    # k-NN Imputation
    knn_imputed_data = knn_imputation(data)
    knn_imputed_data.to_csv(f"{dataset_name}_KNNImputed.txt", sep='\t', index=False, header=False)
    print(f"{dataset_name} k-NN Imputation complete. Saved to {dataset_name}_KNNImputed.txt")


Dataset1 Mean Imputation complete. Saved to Dataset1_MeanImputed.txt
Dataset1 k-NN Imputation complete. Saved to Dataset1_KNNImputed.txt
Dataset2 Mean Imputation complete. Saved to Dataset2_MeanImputed.txt
Dataset2 k-NN Imputation complete. Saved to Dataset2_KNNImputed.txt
Dataset3 Mean Imputation complete. Saved to Dataset3_MeanImputed.txt
Dataset3 k-NN Imputation complete. Saved to Dataset3_KNNImputed.txt
