In [1]:
import pandas as pd
import numpy as np

In [2]:
class DataPrepKit:
    def __init__(self, data_path):
        self.data = self._read_data(data_path)
    
    def _read_data(self, data_path):
        if data_path.endswith('.csv'):
            return pd.read_csv(data_path)
        elif data_path.endswith('.xlsx'):
            return pd.read_excel(data_path)
        elif data_path.endswith('.json'):
            return pd.read_json(data_path)
        else:
            raise ValueError("Unsupported file format")

    def summary(self):
        return self.data.describe()

    def handle_missing_values(self, strategy='mean'):
        if strategy == 'drop':
            return self.data.dropna()
        elif strategy == 'mean':
            return self.data.fillna(self.data.mean())
        elif strategy == 'median':
            return self.data.fillna(self.data.median())
        else:
            raise ValueError("Invalid missing value strategy")

    def encode_categorical(self):
        return pd.get_dummies(self.data)

## Example usage

In [5]:
data_path = input("Enter the path to your data file: ")
data = DataPrepKit(data_path)
summary = data.summary()
print("Summary statistics:")
print(summary)

Summary statistics:
       account length    area code  number vmail messages  total day minutes  \
count     3333.000000  3333.000000            3333.000000        3333.000000   
mean       101.064806   437.182418               8.099010         179.775098   
std         39.822106    42.371290              13.688365          54.467389   
min          1.000000   408.000000               0.000000           0.000000   
25%         74.000000   408.000000               0.000000         143.700000   
50%        101.000000   415.000000               0.000000         179.400000   
75%        127.000000   510.000000              20.000000         216.400000   
max        243.000000   510.000000              51.000000         350.800000   

       total day calls  total day charge  total eve minutes  total eve calls  \
count      3333.000000       3333.000000        3333.000000      3333.000000   
mean        100.435644         30.562307         200.980348       100.114311   
std          20.069