In [0]:
import os, re 
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.types  import DoubleType, IntegerType, StringType, StructType, StructField
from pyspark.sql.functions import col, regexp_replace,when,sum,to_date,avg
from pyspark.sql import functions as F
from pyspark.sql.types import decimal
from pyspark.sql import functions as F
from pyspark.sql import Row


#CHANGE BRONZE URLs TO POINT TO YOUR OWN FOLDER OR  FILE LOCATION
#assign Urls for Processing

bronze_Location = ["dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/countries_continents/",
                  "dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/owid_who_regions/",
                   "dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/urban_rural_population/",
                   "dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/world_inequality_pretax_income/"
                   ]
#Create a spark session
spark = SparkSession.builder.getOrCreate()
def sanitize_column_names(df):
    for col_name in df.columns:
        clean_name = re.sub(r"[ ,;{}()\n\t=.%]", "_", col_name)
        clean_name = re.sub(r"_+", "_", clean_name).strip("_")
        df = df.withColumnRenamed(col_name, clean_name)
    return df
#Loop through files to read and apply transformations
dfs = []

try :
    for  i,bronze_Locations in enumerate(bronze_Location):
       
        df = spark.read.option("header", True).csv(bronze_Locations)
        df = sanitize_column_names(df)

        if "countries_continents" in bronze_Locations:
            df = df.withColumnRenamed("Countries Continents", "Continent") \
                    .withColumnRenamed("Entity", "Country")
        #print(f"reading from file: {bronze_Locations}") 

        elif "owid_who_regions" in bronze_Locations:
            df = df.withColumnRenamed("Entity","Country")\
                   .withColumnRenamed("WHO region", "Continent")
         
            
                                        
        elif "urban_rural_population" in bronze_Locations:
            total_sum = df.agg(
             F.sum("Urban_population_1950-2050_UN_World_Urbanization_Prospects_2018").alias("Total")
            ).collect()[0]["Total"]
             # Check if total_sum is None to avoid division by zero
            if total_sum is None or total_sum == 0:
                 raise ValueError("Total sum of 'Urban population 1950-2050' is zero or null.")
            df = df.withColumn("Urban_population_percent_of_total",
             (F.col("Urban_population_1950-2050_UN_World_Urbanization_Prospects_2018") / total_sum) * 100
                )
            df = df.fillna({"Urban_population_percent_of_total": 0})
            

        elif "world_inequality_pretax_income" in bronze_Locations:
            df = df.withColumnRenamed("Entity", "Country")
            for col_name in df.columns:
                    df = df.fillna({col_name: 0})

        print(f"reading from file: {bronze_Locations}")
        dfs.append(df)

    Table_name_list = [
        "countries",
        "owid_who_regions",
        "urban_rural_population",
        "world_inequality_pretax_income"
    ]  

    for df,table_name in zip(dfs,Table_name_list) :
        try:
            print(f"Writing to table: {table_name}")
            df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(table_name)
            print(f"Successfully wrote table: {table_name}")
        except Exception as e:
            print(f"Error writing table {table_name}: {e}")     
except Exception  as e:
    print("Error: ", e)


reading from file: dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/countries_continents/
reading from file: dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/owid_who_regions/
reading from file: dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/urban_rural_population/
reading from file: dbfs:/FileStore/tables/Bronze/Urbanisation_Prospects/world_inequality_pretax_income/
Writing to table: countries
Successfully wrote table: countries
Writing to table: owid_who_regions
Successfully wrote table: owid_who_regions
Writing to table: urban_rural_population
Successfully wrote table: urban_rural_population
Writing to table: world_inequality_pretax_income
Successfully wrote table: world_inequality_pretax_income


In [0]:
%sql
select * from world_inequality_pretax_income

Country,Year,Gini_coefficient,Palma_ratio_S90/S40_ratio,S90/S10_ratio,S80/S20_ratio,S90/S50_ratio,P90/P10_ratio,P90/P50_ratio,P50/P10_ratio,P0-P10_-_share_of_the_bottom_10,P0-P40_-_share_of_the_bottom_40,P0-P50_-_share_of_the_bottom_50,P10-P20_-_share_of_national_income,P20-P30_-_share_of_national_income,P30-P40_-_share_of_national_income,P40-P50_-_share_of_national_income,P50-P60_-_share_of_national_income,P50-P90_-_share_of_the_middle_40,P60-P70_-_share_of_national_income,P70-P80_-_share_of_national_income,P80-P90_-_share_of_national_income,P90-P100_-_share_of_the_top_10,P99_999-P100_-_share_of_the_top_0_001,P99_99-P100_-_share_of_the_top_0_01,P99_9-P100_-_share_of_the_top_0_1,P99-P100_-_share_of_the_top_1,Mean_income,P0-P10_-_mean_income_of_the_bottom_10,P10-P20_-_mean_income,P20-P30_-_mean_income,P30-P40_-_mean_income,P40-P50_-_mean_income,P50-P60_-_mean_income,P60-P70_-_mean_income,P70-P80_-_mean_income,P80-P90_-_mean_income,P90-P100_-_mean_income_of_the_top_10,P99_999-P100_-_mean_income_of_the_top_0_001,P99_99-P100_-_mean_income_of_the_top_0_01,P99_9-P100_-_mean_income_of_the_top_0_1,P99-P100_-_mean_income_of_the_top_1,P0_-_income_threshold_of_the_bottom_10,P10_-_income_threshold,P20_-_income_threshold,P30_-_income_threshold,P40_-_income_threshold,P50_-_income_threshold_median,P60_-_income_threshold,P70_-_income_threshold,P80_-_income_threshold,P90_-_income_threshold_of_the_top_10,P99_999_-_income_threshold_of_the_top_0_001,P99_99_-_income_threshold_of_the_top_0_01,P99_9_-_income_threshold_of_the_top_0_1,P99_-_income_threshold_of_the_top_1
Cape Verde,1990,0.700339953532129,11.817475,468.15384,63.5,7.019608,35.651764,4.9997916,7.13065,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,4050.563202585264,52.05420436415728,416.4314722957109,731.4642956499077,999.832157567886,1313.372774814427,1728.900272975356,2282.718512817647,3143.658721026013,5184.373842445143,24652.81928404375,2863644.977630571,916722.4148800723,293465.1596612888,93945.3367852628,0.0,208.2168174566291,614.2374488795085,856.359784243128,1135.023868303929,1484.721288324228,1954.227720466435,2629.716986138882,3795.471649790326,7423.297187311874,1437755.809553633,460260.5933421199,147340.6148923982,47167.31586628733
Cape Verde,1991,0.70034003711833,11.817475,468.15384,63.5,7.019608,35.651859,4.9997945,7.1306643,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,3966.095686421772,50.96640773785187,407.7485628431936,716.2113540886115,978.9845244116552,1285.983223577533,1692.84727584387,2235.119300600983,3078.101132696347,5076.262428636726,24138.7304911634,2803928.822582284,897605.8359638941,287345.4537296249,91986.26912672544,0.0,203.8742814215968,601.430427764239,838.5008885373029,1111.354019248437,1453.75909289915,1913.475355404608,2574.879492991315,3716.324172793257,7268.497023274218,1407773.885410407,450662.6371529778,144268.0760120336,46183.72064126477
Cape Verde,1992,0.700339997886367,11.817475,468.15384,63.5,7.019608,35.65181,4.9997888,7.1306634,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,4269.420098374741,54.86560717568219,438.9356704931941,770.9839687097586,1053.856506517658,1384.334744394863,1822.312375314592,2406.059079394339,3313.515028428158,5464.4890428802,25984.84247258632,3018371.206099519,966254.0599575609,309321.4196154682,99021.31158576572,0.0,219.4667539378234,647.4271403783338,902.628986668186,1196.349214093477,1564.943586242318,2059.817522214668,2771.803121615803,4000.546183950688,7824.387050726848,1515439.367480564,485129.0082946919,155301.5993552669,49715.82007567144
Cape Verde,1993,0.70033995725742,11.817475,468.15384,63.5,7.019608,35.651802,4.9997945,7.1306534,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,4480.655929927439,57.58185481512471,460.6483506683557,809.1325422446042,1105.995052966161,1452.827004736252,1912.474063480195,2525.102524904492,3477.456576838365,5734.855163411909,27270.47967735629,3167709.517325084,1014060.870483063,324625.56763505,103920.5421419783,0.0,230.3252566429515,679.4598314893391,947.2870390204856,1255.540056363813,1642.369619671714,2161.728711514898,2908.943350761987,4198.479754969779,8211.510730021186,1590418.009884002,509131.511181314,162985.3795009192,52175.58776185364
Cape Verde,1994,0.70033995725742,11.817475,468.15384,63.5,7.019608,35.651798,4.9997926,7.1306553,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,5178.690483530768,66.55239240143648,532.4126513588499,935.1871938431916,1278.297280814333,1679.162232004385,2210.415720357868,2918.484819380794,4019.205248149025,6628.282212416219,31518.91373505177,3661203.128045973,1172040.179633079,375198.5909158081,120110.1955424886,0.0,266.2074069881986,785.311309960799,1094.86406045007,1451.140163049337,1898.233226931048,2498.502329072372,3362.124183039424,4852.554781688595,9490.772538261626,1838187.372187201,588448.4941417415,188376.6829371714,60303.95882288539
Cape Verde,1995,0.700340030133411,11.817475,468.15384,63.5,7.019608,35.651936,4.9997902,7.1306863,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,5704.472549489514,73.30624700173823,586.4672769542846,1030.13475464103,1408.082447682059,1849.639210642614,2434.837031096617,3214.791537392688,4427.260902683939,7301.234727705652,34718.95838338923,4032916.719174937,1291034.762962368,413291.6197815682,132304.7006524464,0.0,293.2336384771423,865.0426936956454,1206.024765000217,1598.470646078526,2090.957052278628,2752.168716903623,3703.471736709494,5345.222847762672,10454.34625003103,2024814.000126549,648192.231214227,207502.0939834131,66426.46966950828
Cape Verde,1996,0.700340077875971,11.817475,468.15384,63.5,7.019608,35.651794,4.9997921,7.1306548,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,6135.043215309647,78.84038530534764,630.7338955305178,1107.885180702525,1514.364287045438,1989.253474262941,2618.614107650906,3457.443714055402,4761.432891331978,7852.330232055858,37339.52965992047,4337319.836110043,1388481.515964276,444486.7399295815,142291.0132128849,0.0,315.3680290740326,930.3342800669903,1297.053662802299,1719.123079043891,2248.780555647446,2959.901108029651,3983.009518259415,5748.678614774422,11243.43565284701,2177646.182196138,697117.5605109119,223164.2892893655,71440.31463049406
Cape Verde,1997,0.700339975883871,11.817475,468.15384,63.5,7.019608,35.651802,4.9997902,7.1306596,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,6634.212109730989,85.25903418581373,682.0528099285839,1198.025242692655,1637.577259186837,2151.105934122429,2831.673025795999,3738.752679376584,5148.839873525016,8491.22368359244,40377.60506705089,4690219.590990663,1501453.28079064,480651.7433490786,153868.3159253058,0.0,341.0274862730656,1006.030219458525,1402.587236494302,1858.996856769897,2431.750975856582,3200.730198099969,4307.079920343891,6216.41170052782,12158.24450154174,2354827.534745185,753837.6122334268,241321.7343475991,77252.95914708331
Cape Verde,1998,0.700339962379694,11.817475,468.15384,63.5,7.019608,35.651867,4.9997907,7.130672,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,7243.073291387843,93.08338447204228,744.6497748359596,1307.979206651394,1787.864040403222,2348.526964782803,3091.55261384072,4081.880067288104,5621.380458086122,9270.512103683934,44083.29564936394,5120668.293831597,1639250.480708439,524763.9791464609,167989.7154325493,0.0,372.3248874179798,1098.35885040655,1531.310558146291,2029.60791769606,2654.926618888071,3494.480702170922,4702.367480879388,6786.927510453022,13274.07730173069,2570943.788569286,823021.6506758543,263469.2520330016,84342.91262826778
Cape Verde,1999,0.700339965406492,11.817475,468.15384,63.5,7.019608,35.651882,4.9997921,7.1306725,0.13,5.15,8.67,1.03,1.81,2.47,3.24,4.2700000000000005,30.46,5.64,7.76,12.8,60.86,0.7100000000000001,2.26,7.249999999999999,23.19,7810.652266683775,100.3778934591757,803.0015214979327,1410.47646530695,1927.964892971698,2532.561392825223,3333.811194109821,4401.744178243029,6061.88186368358,9996.967777094233,47537.7441381163,5521933.169234206,1767705.127406818,565885.4381450902,181153.6938749456,0.0,401.5007607489663,1184.428866172611,1651.307717994815,2188.651137361571,2862.970426940914,3768.313498630995,5070.853722150862,7318.764905544283,14314.25741487905,2772407.702977334,887515.1446675338,284115.1778505933,90952.17894459036
