In [3]:
import pandas as pd
from fuzzywuzzy import process
from sklearn.impute import SimpleImputer

def load_data(file_path):
    """Load the credit card dataset"""
    return pd.read_csv('creditcard.csv')

def autocorrect_column(df, column_name, threshold=80):
    """Autocorrect values in a categorical column using fuzzy matching"""
    unique_values = df[column_name].dropna().unique()
    corrected_values = {}
    
    for value in unique_values:
        match, score = process.extractOne(value, unique_values)
        if score >= threshold and value != match:
            corrected_values[value] = match
    
    df[column_name] = df[column_name].replace(corrected_values)
    return df, corrected_values

def autocomplete(df, column_name):
    """Autocomplete missing values based on the most frequent value"""
    imputer = SimpleImputer(strategy='most_frequent')
    df[column_name] = imputer.fit_transform(df[[column_name]])
    return df

def analyze_and_correct_data(file_path):
    """Perform autocorrect and autocomplete on the dataset"""
    df = load_data(file_path)
    categorical_columns = df.select_dtypes(include=['object']).columns
    
    corrections = {}
    for col in categorical_columns:
        df, corrected_values = autocorrect_column(df, col)
        corrections[col] = corrected_values
        df = autocomplete(df, col)
    
    return df, corrections


dataset_path = 'creditcard.csv'
cleaned_df, corrections_made = analyze_and_correct_data(dataset_path)

cleaned_df.to_csv('cleaned_creditcard.csv', index=False)


print("Corrections Made:", corrections_made)


Corrections Made: {}
