In [0]:
df_metadata = spark.table("users.user_metadata")
display(df_metadata)

ProcessName,RawTableName,RawTableColumn,RawTableColumnDataType,Logic,CuratedTableName,CuratedTableColumn,CuratedTableColumnDataType
UserInfo,User,ID,STRING,ID is NOT NULL,cur_User,UserID,STRING
UserInfo,User,Name,STRING,,cur_User,UserName,STRING


In [0]:
def drop_and_create_tables(tables_dict):
    for table_name, columns in tables_dict.items():
        col_def = ", ".join(columns)
        drop_sql = f"DROP TABLE IF EXISTS {table_name}"
        create_sql = f"CREATE TABLE {table_name} ({col_def})"
        
        spark.sql(drop_sql)
        spark.sql(create_sql)
        print(f"Created Table: {table_name}")

In [0]:
from collections import defaultdict
raw_tables = defaultdict(list)
curated_tables = defaultdict(list)
for row in df_metadata.collect():
    #print(row)
    table_name = row['RawTableName']
    col_def = f"{row['RawTableColumn']} {row['RawTableColumnDataType']}"
    raw_tables[table_name].append(col_def)
    table_name = row['CuratedTableName']
    col_def = f"{row['CuratedTableColumn']} {row['CuratedTableColumnDataType']}"
    curated_tables[table_name].append(col_def)
drop_and_create_tables(raw_tables)
drop_and_create_tables(curated_tables)

Created Table: User
Created Table: cur_User


In [0]:
%sql
insert into default.user values('201', 'Neha'), ('202', 'Nakul');
insert into default.user(Name) values('Shreya');
Select * from default.user;

ID,Name
201.0,Neha
202.0,Nakul
,Shreya


In [0]:
dbutils.widgets.text("process_name", " ", "Enter process name")
process_name = dbutils.widgets.get("process_name")
input_process_names = [name.strip() for name in process_name.split(",") if name.strip()]
filtered_df = df_metadata.filter(df_metadata['processname'].isin(input_process_names))
display(filtered_df)


ProcessName,RawTableName,RawTableColumn,RawTableColumnDataType,Logic,CuratedTableName,CuratedTableColumn,CuratedTableColumnDataType
UserInfo,User,ID,STRING,ID is NOT NULL,cur_User,UserID,STRING
UserInfo,User,Name,STRING,,cur_User,UserName,STRING


dbutils.widgets.get("process_name") retrieves the process names entered by the user in the widget.
process_name.split(",") splits the input string into a list of process names using a comma as the delimiter.
[name.strip() for name in process_name.split(",") if name.strip()] iterates over the list, strips any leading or trailing whitespace from each name, and includes only non-empty names in the final list input_process_names.

df_metadata.filter(df_metadata['processname'].isin(input_process_names)) filters the df_metadata DataFrame to include only the rows where the processname column matches any of the names in input_process_names.
This allows you to dynamically filter the metadata DataFrame based on user input and display the filtered results.

In [0]:
raw_table_name = df_metadata.select('RawTableName').distinct().collect()[0][0]
user_df = spark.table(raw_table_name)
display(user_df)

for row in df_metadata.collect():
    logic = row['Logic']
    if logic:
        filtered_df = user_df.filter(logic)
        display(filtered_df)

        if row['CuratedTableName']:
            filtered_df.write.insertInto(row['CuratedTableName'])
curated_table_name = df_metadata.select('CuratedTableName').distinct().collect()[0][0]
cur_user_df = spark.table(curated_table_name)
display(cur_user_df)

ID,Name
201.0,Neha
202.0,Nakul
,Shreya


ID,Name
201,Neha
202,Nakul


UserID,UserName
201,Neha
202,Nakul
