#### list_separator : function to separate list values in Pandas DataFrame Object such that each value is in it's own column
Background problem : Had a dataset which had some column values inform of a string\
which looked like a list e.g '[a, b, c, d, e]' \
but can also apply to data values which are lists e.g [a, b, c, d, e]

In [None]:
import ast
import pandas as pd


def list_separator(dataframe, column: str, column_names: list, inplace: bool = False):
    """
    Args:
        dataframe : The Pandas DataFrame or Series object with list values
        column : Column to Work on (single column allowed for now)
        column_names (list): Names of the new columns
        inplace (bool, optional): Defaults to False.
            Return a new dataframe (default) or remove the passed column and replace it with the generated columns

    Returns:
        DataFrame Object\n
                Return new dataframe object with only a single column by default\
                else return a copy of the passed dataframe, but with the passed column splitted
    """

    copy = dataframe.copy(deep=True)

    # Convert string values to lists
    if isinstance(copy[column][0], str):
        copy[column] = copy[column].apply(ast.literal_eval)

    # Get index of current column
    column_index = copy.columns.get_loc(column)

    # Reshape the values to match the expected shape
    reshaped_values = copy[column].to_list()
    new_df = pd.DataFrame(reshaped_values, columns=column_names)

    if inplace == False:
        # Return a new dataframe with only the affected column split
        return new_df
    else:  # Return a copy of the passed dataframe, but with the column splitted
        # Rearranging the resultant dataframe
        copy = pd.concat(
            [copy.iloc[:, :column_index], new_df, copy.iloc[:, column_index:]], axis=1
        )
        copy = copy.drop(columns=[column])

        return copy

#### do_file : function to convert csv file into table in database (without using Pandas) 
Background problem : Had a dataset in form of a CSV file\
The DataFrame.to_sql() raised errors...which led to the use of function below

In [None]:
import csv
import sqlite3
import glob
import os


def do_directory(dirname, db):
    for filename in glob.glob(os.path.join(dirname, "*.csv")):
        do_file(filename, db)


def do_file(filename, db):
    with open(filename) as f:
        with db:
            data = csv.DictReader(f)
            cols = data.fieldnames
            table = os.path.splitext(os.path.basename(filename))[0]

            sql = 'drop table if exists "{}"'.format(table)
            db.execute(sql)

            sql = 'create table "{table}" ( {cols} )'.format(
                table=table, cols=",".join('"{}"'.format(col) for col in cols)
            )
            db.execute(sql)

            sql = 'insert into "{table}" values ( {vals} )'.format(
                table=table, vals=",".join("?" for col in cols)
            )
            db.executemany(sql, (list(map(row.get, cols)) for row in data))


if __name__ == "__main__":
    # Insert to Database here
    conn = sqlite3.connect("abcd.db")
    # Pass path to csv file (absolute or relative)
    do_directory("/home/nyangweso/Desktop/sample.csv", conn)

#### change_dtypes : function to convert data type of the specified columns in a pandas dataframe
Background problem : Had Dataset with multiple incorrect datatype representation (object instead of datetime was the most notorius)

In [None]:
import pandas as pd
import numpy as np


def change_dtypes(df: pd.DataFrame, new_dtypes: dict):
    """Change datatypes of specified columns in a Pandas DataFrame

    Args:
        df (pd.DataFrame): The dataframe containing the columns to be modified
        new_dtypes (dict): dict where each pair consist of column as and new datatype as value
                                    for example {'column_1':'dtype'}.

    Returns:
        pd.DataFrame : The passed dataframe with modified datatypes
    """
    for column, new_dtype in new_dtypes:
        if new_dtype == "datetime":
            df[column] = pd.to_datetime(df[column])
        else:
            df[column] = df[column].astype(new_dtype)

    return df

#### rename_columns : function to rename columns in Pandas Dataframe or series

In [1]:
import pandas as pd


def rename_columns(df: pd.DataFrame | pd.Series):
    """Rename columns of a DataFrame based on user input.

    This function iterates through each column of the input DataFrame and prompts
    the user to provide a new name for the column. If the user provides a new name,
    the column is renamed; otherwise, the column name remains unchanged.

    Args:
    df (pandas.DataFrame): The input DataFrame whose columns are to be renamed.

    Returns:
    None

    """
    for col in df.columns:
        new_name = input(f"Enter new name for column '{col}' : ")
        if new_name:
            df.rename(columns={col: new_name}, inplace=True)