***@Author: Ranjith G C**
<br>
***@Date: 2021-09-30***
<br>
***@Last Modified by: Ranjith G C***
<br>
***@Last Modified time: 2021-09-30***
<br>
***@Title : Program Aim to practice on covid dataset using spark sql operations.***

In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName('covid_data').getOrCreate()
sc=spark.sparkContext
from pyspark.sql import *
from pyspark.sql.functions import col, max as max_, min as min_, unix_timestamp

In [2]:
df = spark.read.format("csv").option("header", "true").load("hdfs://localhost:9000/covid/*.csv")
df.createOrReplaceTempView("ranjith")

# Dispaly State and Active Cases in which they have greater than 1000 cases

In [11]:
where = spark.sql("select State,Active from ranjith where active >= 1000")
where.show()

+-----------------+------+
|            State|Active|
+-----------------+------+
|   Andhra Pradesh| 14388|
|            Assam|  4984|
| Himachal Pradesh|  1616|
|Jammu and Kashmir|  1461|
|        Karnataka| 14386|
|           Kerala|167578|
|      Maharashtra| 45229|
|          Manipur|  2183|
|        Meghalaya|  1896|
|          Mizoram| 15140|
|           Odisha|  4947|
|       Tamil Nadu| 16984|
|        Telengana|  4991|
|      West Bengal|  7810|
+-----------------+------+



# Display State and Death ratio in between 0.0 to 1.0

In [22]:
Death_ratio = spark.sql("select State, DeathRatio from ranjith where DeathRatio >= 0.00 and DeathRatio <= 1.00")
Death_ratio.show()

+--------------------+----------+
|               State|DeathRatio|
+--------------------+----------+
|      Andhra Pradesh|      0.69|
|   Arunachal Pradesh|       0.5|
|               Assam|      0.97|
|Dadra and Nagar H...|      0.04|
|              Kerala|      0.52|
|              Ladakh|       1.0|
|         Lakshadweep|      0.49|
|             Mizoram|      0.32|
|              Odisha|       0.8|
|           Rajasthan|      0.94|
|           Telengana|      0.59|
|             Tripura|      0.96|
+--------------------+----------+



# Display the sum of Active Cases

In [26]:
total = spark.sql("select sum(Active) from ranjith")
total.show()

+---------------------------+
|sum(CAST(Active AS DOUBLE))|
+---------------------------+
|                   309575.0|
+---------------------------+



# Display the Average Discharge Ratio in all states

In [28]:
average = spark.sql("select avg(DischargeRatio) from ranjith")
average.show()

+-----------------------------------+
|avg(CAST(DischargeRatio AS DOUBLE))|
+-----------------------------------+
|                  97.62027777777776|
+-----------------------------------+



# Display the Average Death Ratio in all states

In [29]:
average = spark.sql("select avg(DeathRatio) from ranjith")
average.show()

+-------------------------------+
|avg(CAST(DeathRatio AS DOUBLE))|
+-------------------------------+
|             1.2672222222222222|
+-------------------------------+



# Display the sum of Deaths

In [30]:
total = spark.sql("select sum(Deaths) from ranjith")
total.show()

+---------------------------+
|sum(CAST(Deaths AS DOUBLE))|
+---------------------------+
|                   445385.0|
+---------------------------+



# Saving covid data into hive table

In [3]:
spark = SparkSession.builder.master("local").appName('covid_data').enableHiveSupport().getOrCreate()

In [4]:
hive1 = df.select("State","TotalCases","Active","Discharged","Deaths","ActiveRatio","DischargeRatio","DeathRatio")

In [6]:
hive1.write.mode("overwrite").saveAsTable("default.covid_data")

In [7]:
spark.sql("select *from default.covid_data").show()

+--------------------+----------+------+----------+------+-----------+--------------+----------+
|               State|TotalCases|Active|Discharged|Deaths|ActiveRatio|DischargeRatio|DeathRatio|
+--------------------+----------+------+----------+------+-----------+--------------+----------+
| Andaman and Nicobar|      7600|    13|      7458|   129|       0.17|         98.13|       1.7|
|      Andhra Pradesh|   2039529| 14388|   2011063| 14078|       0.71|          98.6|      0.69|
|   Arunachal Pradesh|     54126|   410|     53444|   272|       0.76|         98.74|       0.5|
|               Assam|    598423|  4984|    587632|  5807|       0.83|          98.2|      0.97|
|               Bihar|    725901|    69|    716173|  9659|       0.01|         98.66|      1.33|
|          Chandigarh|     65188|    41|     64329|   818|       0.06|         98.68|      1.25|
|        Chhattisgarh|   1005094|   297|    991234| 13563|       0.03|         98.62|      1.35|
|Dadra and Nagar H...|     106