In [81]:
%run OEA/modules/Ed-Fi/v0.6/src/utilities/edfi_v0_6_edfi_py

In [82]:
workspace='sandbox1'
table_name = 'ipeds_admission_data'
ipeds_path = 'stage1/iPEDS/IPEDS_TEST_DATA.csv'
stage2_path = f'stage2/iPEDS/ingested/{table_name}'
stage3_path = f'stage3/iPEDS_DW/{table_name}'
stage3_db_name = f"ldb_{workspace}_s3_ipeds"

In [83]:
workspace='prod'
table_name = 'ipeds_admission_data'
ipeds_path = 'stage1/iPEDS/IPEDS_TEST_DATA.csv'
stage2_path = f'stage2/iPEDS/ingested/{table_name}'
stage3_path = f'stage3/iPEDS_DW/{table_name}'
stage3_db_name = f"ldb_{workspace}_s3_ipeds"

In [84]:
oea.set_workspace(workspace)

# Stage 1 to Stage 2 Parquet


In [85]:
df = spark.read.option("inferSchema",True) \
                .option("delimiter",",") \
                .option("header","true") \
                .option("delta.columnMapping.mode", "name")\
  .csv(oea.to_url(ipeds_path))

from pyspark.sql.functions import col
renamed_columns = [col(column).alias(column.replace(' ', '_')) for column in df.columns]
df = df.select(*renamed_columns)
df.write.format('delta').mode('overwrite').save(oea.to_url(stage2_path))

# Stage 2 to Stage 3 Parquet


In [86]:
df = spark.read.format('delta').load(oea.to_url(stage2_path))
df.write.format('delta').mode('overwrite').save(oea.to_url(stage3_path))

In [87]:
spark.sql(f'CREATE DATABASE IF NOT EXISTS {stage3_db_name}')
table_url = oea.to_url(stage3_path)
overwrite = True
logger.info(f'Adding the table - {table_name} to Lake DB')
if overwrite:
    spark.sql(F"DROP TABLE IF EXISTS {stage3_db_name}.{table_name}")
spark.sql(f"CREATE TABLE IF NOT EXISTS {stage3_db_name}.{table_name} using DELTA location '{table_url}'")

# Stage 3 SQL DB connect

In [88]:
def assign_default_variable(variable_name, default_value):
    if variable_name not in globals():
        globals()[variable_name] = default_value
        logger.info(f'{variable_name} not found - using system default')

In [89]:
secret_name='syn-oea-devusc-ondemand-mssql-admin-credentials-password'
server_name='syn-oea-devusc-ondemand.sql.azuresynapse.net'
database_name=f'sdb_{workspace}_s3'
user_name="eduanalyticsuser"
driver='ODBC Driver 18 for SQL Server'
db_password = oea._get_secret(secret_name)
data_source = f'{workspace}_data_source'

In [90]:
assign_default_variable(variable_name = 'secret_name', 
                        default_value = 'syn-oea-hisddev-ondemand-mssql-admin-credentials-password')

assign_default_variable(variable_name = 'server_name', 
                        default_value = 'syn-oea-hisddev-ondemand.sql.azuresynapse.net')
assign_default_variable(variable_name = 'database_name', 
                        default_value = 'sdb_sandbox2_s3')

assign_default_variable(variable_name = 'user_name', 
                        default_value = 'eduanalyticsuser')

assign_default_variable(variable_name = 'driver', 
                        default_value = 'ODBC Driver 18 for SQL Server')



In [91]:
%run OEA/modules/iPEDS/v0.1/utillities/SQL_Essentials

In [93]:
sql_db = SQLDatabase(server_name, database_name, user_name, db_password)

assign_default_variable(variable_name = 'data_source', 
                        default_value = 'sandbox2_data_source')
                    
root_path = 'stage3/iPEDS_DW'

if workspace == 'prod':
    # FIXME: 2024-03-06 For prod
    root_path = 'iPEDS_DW'


view_manager = ViewManager(sql_db, data_source)
view_manager.create_and_populate_views('ipeds', root_path, ['ipeds_admission_data'], overwrite=True)

# Semantic View


In [77]:
#query = """Drop VIEW [semantic].[vw_ipeds_admission_date]"""

In [94]:
query ="""CREATE VIEW [semantic].[vw_ipeds_admission_data] AS

SELECT [UnitID] as [UnitID]
,[IPEDS_Year] as [IPEDS Year]
,[Full-time_retention_rate___] as [Full-time retention rate   ]
,[Graduation_rate_-_Bachelor_degree_within_4_years__total_] as [Graduation rate - Bachelor degree within 4 years  total ]
,[Graduation_rate_-_Bachelor_degree_within_6_years__total_] as [Graduation rate - Bachelor degree within 6 years  total ]
,[Graduate_enrollment_] as [Graduate enrollment ]
,[Admissions_total] as [Admission total]
,[Enrolled_total] as [Enrolled total]
,[First-time_degree/certificate-seeking_undergraduate_enrollment_] as [First-time degree/certificate-seeking undergraduate enrollment ]
,[Full-time__first-time__degree/certificate_seeking_undergraduates_] as [Full-time  first-time  degree/certificate seeking undergraduates ]
,[Undergraduate_enrollment_] as [Undergraduate enrollment ]
,[Full-time_equivalent_fall_enrollment] as [Full-time equivalent fall enrollment]
,[Published_in-state_tuition_and_fees] as [Published in-state tuition and fees]
,[Average_amount_of_institutional_grant_aid_awarded_to_full-time_first-time_undergraduates_] as [Average amount of institutional grant aid awarded to full-time first-time undergraduates ]
,[Revenues_from_tuition_and_fees_per_FTE] as [Revenues from tuition and fees per FTE]
,[Instruction_expenses_per_FTE] as [Instruction expenses per FTE]
,[Applicants_total] as [Applicants total]
,[Institution_Name] as [Institution Name]
,[State_abbreviation] as [State abbreviation]
,[Control_of_institution] as [Control of institution]
,[Level_of_institution] as [Level of institution]
 FROM [ipeds].[ipeds_admission_data]
"""

In [95]:
sql_db.execute_query(query=query)