## Your custom library

In [None]:
code=r"""
from datetime import datetime, timedelta
from pyspark.sql.functions import lit, from_utc_timestamp, col, when, to_date, explode_outer, unbase64, from_json, from_unixtime, udf
from uuid import uuid4
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.types import StructType, ArrayType, StringType, NullType

import sempy.fabric as fabric
import pandas as pd
import requests
import os
import json as json_lib
import logging
import re
import time
import notebookutils

def trim(df):
    stringCol= (col for col in df.schema if str(col.dataType)=="StringType")
    for col in stringCol:
        df = df.withColumn(col.name,trim(col.name))
    return df
  

def deDuplicate(df, subset=None):
    df = df.dropDuplicates(subset)
    return df

def replaceNull(df, value, subset=None):
    from datetime import datetime
    target_type = None
    
    if isinstance(value, str):
        try:
            datetime.strptime(value, "%Y-%m-%d")
            target_type = "date"
        except ValueError:
            try:
                datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
                target_type = "timestamp"
            except ValueError:
                pass

    columns_to_process = subset if subset is not None else df.columns

    if target_type:
       
        cols_to_fix = [
            f.name for f in df.schema 
            if f.dataType.simpleString() == target_type 
            and f.nullable 
            and f.name in columns_to_process
        ]
        
        
        update_exprs = [
            when(col(c).isNull(), lit(value).cast(target_type)).otherwise(col(c)).alias(c) 
            if c in cols_to_fix else col(c) 
            for c in df.columns
        ]
        df = df.select(*update_exprs)
    else:
        df = df.fillna(value, subset=columns_to_process)

    return df
    
    return df.fillna(value, subset)

def drop_selected_columns(df, columns_to_drop):
    return df.drop(*columns_to_drop)

def use_selected_columns(df, columns_to_select):
    return df.select(*columns_to_select)
"""

## Save Library to lakehouse folder

In [None]:
import os
import textwrap

package_name = 'super_utils'
target_path = 'abfs_path to lakehouse folder'

code_content = textwrap.dedent(code).strip()
os.makedirs(package_name, exist_ok=True)

with open(f'{package_name}/functions.py', 'w', encoding='utf-8') as f:
    f.write(code_content)

with open(f'{package_name}/__init__.py', 'w', encoding='utf-8') as f:
    f.write("from .functions import *")

setup_content = f"""
from setuptools import setup, find_packages
setup(
    name='{package_name}',
    version='0.1',
    packages=find_packages(),
    install_requires=['pyspark'],
)
"""
with open('setup.py', 'w', encoding='utf-8') as f:
    f.write(setup_content)

!python setup.py bdist_wheel

wheel_file = [f for f in os.listdir('dist') if f.endswith('.whl')][0]
local_wheel_path = f"dist/{wheel_file}"
remote_wheel_path = f"{target_path}/{wheel_file}"

# Copy to Lakehouse
notebookutilsutils.fs.cp(f"file://{os.getcwd()}/{local_wheel_path}", remote_wheel_path)

print(f"Wheel saved to: {remote_wheel_path}")

## Load library to notebook

In [None]:


def get_library():
    import os
    
    path_abfss = target_path + filename"
    local_tmp = "/tmp/filename"
    notebookutils.fs.cp(path_abfss, "file:" + local_tmp)
    os.system(f"pip install {local_tmp}")

get_library()

import super_utils as su