In [0]:
# Mount the Azure Blob Storage
dbutils.fs.mount(
    source = "wasbs://raw@aipoweredinsights.blob.core.windows.net",
    mount_point = "/mnt/raw2",
    extra_configs = {"fs.azure.account.key.aipoweredinsights.blob.core.windows.net":"enter-your-key"}
)

Out[2]: True

In [0]:
# Listing content
dbutils.fs.ls("/mnt/raw2/")

Out[4]: [FileInfo(path='dbfs:/mnt/raw2/dbo.ai_job_market_insights.txt', name='dbo.ai_job_market_insights.txt', size=54485, modificationTime=1726382432000)]

In [0]:
# Create a dataframe
df = spark.read.format("csv").options(header='True', inferSchema='True').load('dbfs:/mnt/raw2/dbo.ai_job_market_insights.txt')

In [0]:
# View content
display(df)

Job_Title,Industry,Company_Size,Location,AI_Adoption_Level,Automation_Risk,Required_Skills,Salary_USD,Remote_Friendly,Job_Growth_Projection
Cybersecurity Analyst,Entertainment,Small,Dubai,Medium,High,UX/UI Design,111392.1640625,True,Growth
Marketing Specialist,Technology,Large,Singapore,Medium,High,Marketing,93792.5625,False,Decline
AI Researcher,Technology,Large,Singapore,Medium,High,UX/UI Design,107170.265625,True,Growth
Sales Manager,Retail,Small,Berlin,Low,High,Project Management,93027.953125,False,Growth
Cybersecurity Analyst,Entertainment,Small,Tokyo,Low,Low,JavaScript,87752.921875,True,Decline
UX Designer,Education,Large,San Francisco,Medium,Medium,Cybersecurity,102825.0078125,False,Growth
HR Manager,Finance,Medium,Singapore,Low,High,Sales,102065.71875,True,Growth
Cybersecurity Analyst,Technology,Small,Dubai,Medium,Low,Machine Learning,86607.3203125,True,Decline
AI Researcher,Retail,Large,London,High,Low,JavaScript,75015.859375,False,Stable
Sales Manager,Entertainment,Medium,Singapore,High,Low,Cybersecurity,96834.578125,True,Decline


In [0]:
# Create table
df.createOrReplaceTempView("jobs")

In [0]:
%sql
SELECT *
FROM jobs
LIMIT 10;

Job_Title,Industry,Company_Size,Location,AI_Adoption_Level,Automation_Risk,Required_Skills,Salary_USD,Remote_Friendly,Job_Growth_Projection
Cybersecurity Analyst,Entertainment,Small,Dubai,Medium,High,UX/UI Design,111392.1640625,True,Growth
Marketing Specialist,Technology,Large,Singapore,Medium,High,Marketing,93792.5625,False,Decline
AI Researcher,Technology,Large,Singapore,Medium,High,UX/UI Design,107170.265625,True,Growth
Sales Manager,Retail,Small,Berlin,Low,High,Project Management,93027.953125,False,Growth
Cybersecurity Analyst,Entertainment,Small,Tokyo,Low,Low,JavaScript,87752.921875,True,Decline
UX Designer,Education,Large,San Francisco,Medium,Medium,Cybersecurity,102825.0078125,False,Growth
HR Manager,Finance,Medium,Singapore,Low,High,Sales,102065.71875,True,Growth
Cybersecurity Analyst,Technology,Small,Dubai,Medium,Low,Machine Learning,86607.3203125,True,Decline
AI Researcher,Retail,Large,London,High,Low,JavaScript,75015.859375,False,Stable
Sales Manager,Entertainment,Medium,Singapore,High,Low,Cybersecurity,96834.578125,True,Decline


# Transformations

## 1. Average Salary by Industry.

In [0]:
%sql
SELECT 
  Industry,
  AVG(Salary_USD) AS Average_Industry_Salary
FROM jobs
GROUP BY Industry
ORDER BY Average_Industry_Salary DESC;

Industry,Average_Industry_Salary
Finance,94355.46572818396
Entertainment,94291.23362699468
Education,93798.52186129386
Energy,92763.93949298467
Healthcare,91688.50409226192
Retail,91232.89733355978
Technology,91204.01157924108
Telecommunications,90418.30439268867
Manufacturing,86590.0035695043
Transportation,85058.29712540064


## 2. AI Adoption Rate per Industry.

In [0]:
%sql
SELECT Industry,
       AVG(CASE WHEN AI_Adoption_Level = 'High' THEN 1 ELSE 0 END) as High_AI_Adoption_Rate
FROM jobs
GROUP BY Industry
ORDER BY High_AI_Adoption_Rate DESC;

Industry,High_AI_Adoption_Rate
Healthcare,0.4047619047619047
Transportation,0.3076923076923077
Energy,0.3061224489795918
Retail,0.3043478260869565
Manufacturing,0.293103448275862
Finance,0.2830188679245283
Telecommunications,0.2830188679245283
Education,0.2807017543859649
Entertainment,0.2765957446808511
Technology,0.2321428571428571


## 3. Automation Risk Distribution.

In [0]:
%sql
SELECT Automation_Risk, COUNT(*) as Job_Count
FROM jobs
GROUP BY Automation_Risk
ORDER BY Job_Count DESC;

Automation_Risk,Job_Count
Medium,173
High,169
Low,158


## 4. Remote Work Prevalence.

In [0]:
%sql
SELECT Industry,
       SUM(CASE WHEN Remote_friendly = TRUE THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as Remote_Work_Rate
FROM jobs
GROUP BY Industry
ORDER BY Remote_Work_Rate DESC;

Industry,Remote_Work_Rate
Entertainment,0.5957446808510638
Healthcare,0.5714285714285714
Energy,0.5510204081632653
Transportation,0.5128205128205128
Finance,0.5094339622641509
Education,0.5087719298245614
Telecommunications,0.490566037735849
Technology,0.4464285714285714
Retail,0.4347826086956521
Manufacturing,0.4310344827586206


## 5. Job Growth Projection by Industry.

In [0]:
%sql
SELECT Industry, Job_Growth_Projection
FROM jobs
GROUP BY Industry, Job_Growth_Projection
ORDER BY Job_Growth_Projection DESC;

Industry,Job_Growth_Projection
Healthcare,Stable
Telecommunications,Stable
Manufacturing,Stable
Entertainment,Stable
Transportation,Stable
Finance,Stable
Retail,Stable
Education,Stable
Technology,Stable
Energy,Stable


## 6. Skills Demand.

In [0]:
%sql
SELECT Industry, Required_Skills, COUNT(*) AS Total_Count
FROM jobs
GROUP BY Required_Skills, Industry
ORDER BY Total_Count DESC
LIMIT 10;

Industry,Required_Skills,Total_Count
Education,Project Management,10
Technology,Marketing,10
Education,Cybersecurity,9
Retail,Cybersecurity,9
Education,Data Analysis,9
Finance,Python,9
Telecommunications,Project Management,9
Technology,Sales,9
Energy,UX/UI Design,9
Manufacturing,Marketing,8


## 7. Salary Range for Different Company Sizes. 

In [0]:
%sql
SELECT Company_Size,
       MIN(Salary_USD) as Min_Salary,
       AVG(Salary_USD) as Avg_Salary,
       MAX(Salary_USD) as Max_Salary
FROM jobs
GROUP BY Company_Size
ORDER BY Avg_Salary DESC;

Company_Size,Min_Salary,Avg_Salary,Max_Salary
Small,31969.525390625,92165.97428956506,140476.0
Medium,41396.15625,90862.77511503069,155209.828125
Large,33601.3828125,90603.50315323797,138448.109375


## 8. Location-based Salary Variations.

In [0]:
%sql
SELECT Location,
       AVG(Salary_USD) as Avg_Salary,
       COUNT(*) as Job_Count
FROM jobs
GROUP BY Location
HAVING COUNT(*) > 10
ORDER BY Avg_Salary DESC;

Location,Avg_Salary,Job_Count
New York,93780.4263791454,49
Singapore,93739.80700231482,54
Berlin,93240.36889648438,48
Tokyo,92896.55392156864,51
Paris,92116.47359035326,46
Sydney,91885.40993088942,52
San Francisco,88953.23658014114,62
Toronto,88839.92902057926,41
London,88810.67263926631,46
Dubai,87892.45572916667,51


## 9. Correlation between AI Adoption and Salary.

In [0]:
%sql
SELECT AI_Adoption_Level,
       AVG(Salary_USD) as Avg_Salary,
       COUNT(*) as Job_Count
FROM jobs
GROUP BY AI_Adoption_Level
ORDER BY Avg_Salary DESC;

AI_Adoption_Level,Avg_Salary,Job_Count
Low,93353.59548985273,174
Medium,92139.14426937848,179
High,87583.42198129251,147


## 10. High-Growth Jobs with Low Automation Risk.

In [0]:
%sql
SELECT Job_title,
       AVG(Job_Growth_Projection) as Avg_Growth,
       AVG(CASE WHEN Automation_Risk = 'Low' THEN 1 ELSE 0 END) as Low_Risk_Rate
FROM jobs
GROUP BY Job_title
HAVING COUNT(*) > 5
ORDER BY Avg_Growth DESC, Low_Risk_Rate DESC
LIMIT 10;

Job_title,Avg_Growth,Low_Risk_Rate
Operations Manager,,0.4090909090909091
Data Scientist,,0.4032258064516129
Cybersecurity Analyst,,0.3818181818181818
Sales Manager,,0.3673469387755102
Software Engineer,,0.3170731707317073
Product Manager,,0.3076923076923077
AI Researcher,,0.2941176470588235
HR Manager,,0.2807017543859649
Marketing Specialist,,0.2083333333333333
UX Designer,,0.1851851851851851
