In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

In [2]:
unemployment = spark.read.format('csv').option('header','true').option('delimiter', ',').option('inferSchema', 'true').load('Unemployment in America Per US State.csv')
unemployment.createOrReplaceTempView('unemploymentDetails')

In [5]:
spark.sql('SELECT `Total Civilian Non-Institutional Population in State/Area` AS Total_SUM, `State/Area`\
FROM unemploymentDetails where Year=2022 and month =1 Order by Total_SUM desc LIMIT 10').show()

+---------+------------+
|Total_SUM|  State/Area|
+---------+------------+
| 31078917|  California|
| 22754672|       Texas|
| 17958627|     Florida|
| 16012195|    New York|
| 10495444|Pennsylvania|
| 10055315|    Illinois|
|  9333345|        Ohio|
|  8456830|     Georgia|
|  8071287|    Michigan|
|  7416181|  New Jersey|
+---------+------------+



In [10]:
spark.sql('SELECT Avg(`Total Civilian Labor Force in State/Area`) AS Total_SUM, `State/Area`\
FROM unemploymentDetails where Year=2022 group by `State/Area`\
Order by Total_SUM desc LIMIT 10').show()

+--------------------+------------+
|           Total_SUM|  State/Area|
+--------------------+------------+
|        1.92419715E7|  California|
|1.4658246083333334E7|       Texas|
|1.0759125666666666E7|     Florida|
|           9616510.0|    New York|
|   6476709.333333333|Pennsylvania|
|          6472601.75|    Illinois|
|           5742445.5|        Ohio|
|   5233049.916666667|     Georgia|
|   4836169.916666667|    Michigan|
|   4739691.333333333|  New Jersey|
+--------------------+------------+



In [19]:
spark.sql('SELECT Avg(`Total Employment in State/Area`) AS Total_Employment, `State/Area`\
FROM unemploymentDetails where Year=2022 group by `State/Area` Order by Total_Employment desc LIMIT 10').show()

+--------------------+------------+
|    Total_Employment|  State/Area|
+--------------------+------------+
|1.8437955833333332E7|  California|
|1.4089994583333334E7|       Texas|
|1.0446708916666666E7|     Florida|
|   9207902.833333334|    New York|
|          6195460.75|Pennsylvania|
|   6178094.083333333|    Illinois|
|          5511702.75|        Ohio|
|          5074128.75|     Georgia|
|   4633371.333333333|    Michigan|
|          4565442.75|  New Jersey|
+--------------------+------------+



In [31]:
spark.sql('SELECT Avg(`Total Unemployment in State/Area`) AS Total_Unemployment, `State/Area`\
FROM unemploymentDetails where Year=2022 group by `State/Area` Order by Total_Unemployment desc LIMIT 10').show()

+------------------+------------+
|Total_Unemployment|  State/Area|
+------------------+------------+
| 804015.6666666666|  California|
|          568251.5|       Texas|
| 408607.1666666667|    New York|
|         312416.75|     Florida|
| 294507.6666666667|    Illinois|
| 281248.5833333333|Pennsylvania|
|         230742.75|        Ohio|
|202798.58333333334|    Michigan|
|174248.58333333334|  New Jersey|
|166458.08333333334|  Washington|
+------------------+------------+



In [38]:
spark.sql('SELECT MAX(`Total Unemployment in State/Area`) AS Total_Unemployment,Year,Month,`State/Area` FROM unemploymentDetails where year =2022 Group by Year,Month,`State/Area` Order by Total_Unemployment desc LIMIT 12').show()

+------------------+----+-----+----------+
|Total_Unemployment|Year|Month|State/Area|
+------------------+----+-----+----------+
|            994165|2022|    1|California|
|            909437|2022|    2|California|
|            837088|2022|    3|California|
|            798481|2022|   10|California|
|            797702|2022|   11|California|
|            791466|2022|    4|California|
|            790262|2022|   12|California|
|            769532|2022|    9|California|
|            760435|2022|    5|California|
|            743821|2022|    6|California|
|            732210|2022|    7|California|
|            723589|2022|    8|California|
+------------------+----+-----+----------+



In [46]:
spark.sql('SELECT MAX(`Total Employment in State/Area`) AS Total_Employment,Year,Month,`State/Area` FROM unemploymentDetails\
 where year=2022\
  group by Year,Month,`State/Area` order by Total_Employment desc LIMIT 12').show()

+----------------+----+-----+----------+
|Total_Employment|Year|Month|State/Area|
+----------------+----+-----+----------+
|        18512138|2022|    6|California|
|        18506108|2022|    7|California|
|        18489323|2022|    5|California|
|        18488550|2022|    8|California|
|        18484393|2022|   11|California|
|        18482096|2022|   12|California|
|        18475849|2022|   10|California|
|        18471241|2022|    9|California|
|        18455355|2022|    4|California|
|        18382817|2022|    3|California|
|        18301764|2022|    2|California|
|        18205836|2022|    1|California|
+----------------+----+-----+----------+



In [53]:
spark.sql('SELECT `State/Area`, `Percent (%) of Labor Force Unemployed in State/Area`, \
case when Month = 1 then "January"\
when Month = 2 then "February" \
when Month = 3 then "March" \
else "NOT IN FIRST QUARTER" end as MonthDetails \
from unemploymentDetails where Year=2022 and Month in(1,2,3) order by `Percent (%) of Labor Force Unemployed in State/Area` desc LIMIT 20').show()

+--------------------+---------------------------------------------------+------------+
|          State/Area|Percent (%) of Labor Force Unemployed in State/Area|MonthDetails|
+--------------------+---------------------------------------------------+------------+
|District of Columbia|                                                6.0|     January|
|District of Columbia|                                                5.6|    February|
|              Nevada|                                                5.4|     January|
|          New Mexico|                                                5.4|     January|
|              Nevada|                                                5.3|    February|
|              Nevada|                                                5.3|       March|
|          California|                                                5.2|     January|
|District of Columbia|                                                5.2|       March|
|              Alaska|          

In [54]:
spark.sql('SELECT `State/Area`, `Percent (%) of Labor Force Employed in State/Area`, \
case when Month = 1 then "January"\
when Month = 2 then "February" \
when Month = 3 then "March" \
else "NOT IN FIRST QUARTER" end as MonthDetails \
from unemploymentDetails where Year=2022 and Month in(1,2,3) order by `Percent (%) of Labor Force Employed in State/Area` desc LIMIT 20').show()

+--------------------+-------------------------------------------------+------------+
|          State/Area|Percent (%) of Labor Force Employed in State/Area|MonthDetails|
+--------------------+-------------------------------------------------+------------+
|            Nebraska|                                             68.4|       March|
|            Nebraska|                                             68.3|    February|
|            Nebraska|                                             68.1|     January|
|        North Dakota|                                             68.0|       March|
|        North Dakota|                                             67.8|    February|
|        North Dakota|                                             67.6|     January|
|        South Dakota|                                             67.3|       March|
|                Utah|                                             67.2|       March|
|        South Dakota|                                

In [64]:
spark.sql('SELECT AVG(`Percent (%) of Labor Force Employed in State/Area`) AS TotalPercentEmployment, `State/Area` \
          FROM unemploymentDetails group by `State/Area` Order by TotalPercentEmployment desc LIMIT 10').show()

+----------------------+-------------+
|TotalPercentEmployment|   State/Area|
+----------------------+-------------+
|     68.29290780141841|    Minnesota|
|     68.12127659574467|     Nebraska|
|      67.3890070921986| South Dakota|
|      67.1929078014184|New Hampshire|
|     67.00851063829784| North Dakota|
|     66.70602836879435|     Colorado|
|     66.50248226950352|         Iowa|
|     66.06347517730492|      Wyoming|
|      65.9620567375887|         Utah|
|     65.94645390070932|      Vermont|
+----------------------+-------------+



In [65]:
spark.sql('SELECT AVG(`Percent (%) of Labor Force Unemployed in State/Area`) AS TotalPercentUnemployment, `State/Area` \
          FROM unemploymentDetails group by `State/Area` Order by TotalPercentUnemployment desc LIMIT 10').show()

+------------------------+--------------------+
|TotalPercentUnemployment|          State/Area|
+------------------------+--------------------+
|       7.970212765957453|       West Virginia|
|       7.744148936170211|            Michigan|
|       7.721453900709224|              Alaska|
|      7.5264184397163145|District of Columbia|
|       7.348936170212776|         Mississippi|
|       7.230496453900716|          California|
|      7.0104609929078014|           Louisiana|
|       6.850354609929077|            Illinois|
|       6.847340425531915|             Alabama|
|       6.847163120567378|              Oregon|
+------------------------+--------------------+

