In [3]:
import pandas as pd
import os

# File paths
cleveland_path = "../../DataSet/original/processed/processed_cleveland.csv"
hungarian_path = "../../DataSet/original/processed/processed_hungarian.csv"
switzerland_path = "../../DataSet/original/processed/processed_switzerland.csv"

# Output folder path
output_folder = "../../DataSet/preprocessed/"
os.makedirs(output_folder, exist_ok=True)  # Create the folder if it doesn't exist

# Load datasets
cleveland_data = pd.read_csv(cleveland_path)
hungarian_data = pd.read_csv(hungarian_path)
switzerland_data = pd.read_csv(switzerland_path)

# Define a function to preprocess datasets
def preprocess_dataset(dataset):
    numeric_dataset = dataset.apply(pd.to_numeric, errors="coerce")
    numeric_dataset.fillna(numeric_dataset.mean(), inplace=True)
    return numeric_dataset

# Preprocess datasets
cleveland_data = preprocess_dataset(cleveland_data)
hungarian_data = preprocess_dataset(hungarian_data)
switzerland_data = preprocess_dataset(switzerland_data)

# Save the preprocessed datasets
cleveland_data.to_csv(os.path.join(output_folder, "preprocessed_cleveland.csv"), index=False)
hungarian_data.to_csv(os.path.join(output_folder, "preprocessed_hungarian.csv"), index=False)
switzerland_data.to_csv(os.path.join(output_folder, "preprocessed_switzerland.csv"), index=False)

# Display results
print("Cleveland Data (Preprocessed):")
print(cleveland_data.head(2))

print("\nHungarian Data (Preprocessed):")
print(hungarian_data.head(2))

print("\nSwitzerland Data (Preprocessed):")
print(switzerland_data.head(2))

print(f"\nPreprocessed files have been saved to: {output_folder}")


Cleveland Data (Preprocessed):
    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5   

   slope   ca  thal  target  
0    3.0  0.0   6.0       0  
1    2.0  3.0   3.0       2  

Hungarian Data (Preprocessed):
   age  sex  cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0   28    1   2     130.0  132.0  0.0      2.0    185.0    0.0      0.0   
1   29    1   2     120.0  243.0  0.0      0.0    160.0    0.0      0.0   

      slope   ca      thal  target  
0  1.894231  0.0  5.642857       0  
1  1.894231  0.0  5.642857       0  

Switzerland Data (Preprocessed):
   age  sex  cp  trestbps  chol       fbs   restecg  thalach  exang  oldpeak  \
0   32    1   1      95.0     0  0.104167  0.000000    127.0    0.0      0.7   
1   34    1   4     115.0     0  0.104167  0.360656    154.0    0.0      0.2   

   