In [1]:
import pandas as pd
import json
import os
import re

In [12]:
def load_data(file_path, encoding='utf-8'):
    """
    Load data from a specified file path. Supports CSV, Excel, JSON, and Parquet files.
    
    Parameters:
    - file_path (str): Path to the file to be loaded.
    - encoding (str): Encoding to be used for reading the file. Default is 'utf-8'.
    
    Returns:
    - dict or DataFrame: A dictionary of DataFrames with keys being sheet names for Excel files,
                         or a single DataFrame for other file types.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file at {file_path} does not exist.")
    
    file_extension = os.path.splitext(file_path)[1].lower()
    
    try:
        if file_extension == '.csv':
            data = pd.read_csv(file_path, encoding=encoding)
            data.columns = data.columns.str.lower()
        elif file_extension in ['.xls', '.xlsx']:
            data = pd.read_excel(file_path, sheet_name=None)
        elif file_extension == '.json':
            with open(file_path, 'r', encoding=encoding) as f:
                json_data = json.load(f)
            data = pd.json_normalize(json_data)
            data.columns = data.columns.str.lower()
        elif file_extension == '.parquet':
            data = pd.read_parquet(file_path)
        else:
            raise ValueError(f"Unsupported file extension: {file_extension}")
    except UnicodeDecodeError as e:
        raise ValueError(f"Error loading the file due to encoding issues: {e}")
    except Exception as e:
        raise ValueError(f"Error loading the file: {e}")
    
    return data

def sanitize_sheet_name(sheet_name):
    """
    Sanitize the sheet name to be a valid Python variable name and convert to lowercase.
    
    Parameters:
    - sheet_name (str): Original sheet name.
    
    Returns:
    - str: Sanitized sheet name.
    """
    return re.sub(r'\W|^(?=\d)', '_', sheet_name.lower())

In [15]:
# path = r"D:\College\Academics\Extra\Datasets\daily-total-female-births.csv"
# path = r"D:\College\Academics\Extra\Datasets\test.xlsx"
path = r"D:\College\Academics\SEM 4\New Generation Database\Datasets\playstore.json"

# Helper Code

In [19]:
#reading csv or json
df = load_data(path)
display(df.head())

# excel_data = load_data(path)
# for sheet_name, df in excel_data.items():
#     sanitized_name = sanitize_sheet_name(sheet_name)
#     globals()[sanitized_name] = df
#     print(f"DataFrame '{sanitized_name}' created:")
#     display(globals()[sanitized_name].head())

Unnamed: 0,app,category,rating,reviews,size,installs,type,price,content rating,genres,last updated,current ver,android ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
