# Setup

In [45]:
from google.colab import drive
drive.mount('/content/drive')

!wget https://dlcdn.apache.org/spark/spark-3.3.0/spark-3.3.0-bin-hadoop3.tgz
!tar -xvzf spark-3.3.0-bin-hadoop3.tgz
!pip install findspark

import os
os.environ["SPARK_HOME"] = "/content/spark-3.3.0-bin-hadoop3"
import findspark
findspark.init()

from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("IPL Analysis").getOrCreate()

from IPython.display import clear_output 
clear_output()

print(spark.sparkContext.appName ,"is Running..")

IPL Analysis is Running..


# Data Exploration

In [46]:
df1=spark.read.option("inferSchema", "true").option("header", "true").csv("/content/drive/MyDrive/Colab Notebooks/SparkData/IPL Matches.csv")
df1.createOrReplaceTempView("ipl");

In [47]:
spark.sql("SHOW TABLES")

DataFrame[namespace: string, tableName: string, isTemporary: boolean]

In [48]:
spark.sql("desc ipl").show()

+---------------+---------+-------+
|       col_name|data_type|comment|
+---------------+---------+-------+
|             id|      int|   null|
|           city|   string|   null|
|           date|timestamp|   null|
|player_of_match|   string|   null|
|          venue|   string|   null|
|  neutral_venue|      int|   null|
|          team1|   string|   null|
|          team2|   string|   null|
|    toss_winner|   string|   null|
|  toss_decision|   string|   null|
|         winner|   string|   null|
|         result|   string|   null|
|  result_margin|   string|   null|
|     eliminator|   string|   null|
|         method|   string|   null|
|        umpire1|   string|   null|
|        umpire2|   string|   null|
+---------------+---------+-------+



In [49]:
spark.sql("select count(*) count from ipl").show()

+-----+
|count|
+-----+
|  816|
+-----+



In [50]:
spark.sql("select * from ipl limit 10").show()

+------+----------+-------------------+---------------+--------------------+-------------+--------------------+--------------------+--------------------+-------------+--------------------+-------+-------------+----------+------+---------+--------------+
|    id|      city|               date|player_of_match|               venue|neutral_venue|               team1|               team2|         toss_winner|toss_decision|              winner| result|result_margin|eliminator|method|  umpire1|       umpire2|
+------+----------+-------------------+---------------+--------------------+-------------+--------------------+--------------------+--------------------+-------------+--------------------+-------+-------------+----------+------+---------+--------------+
|335982| Bangalore|2008-04-18 00:00:00|    BB McCullum|M Chinnaswamy Sta...|            0|Royal Challengers...|Kolkata Knight Ri...|Royal Challengers...|        field|Kolkata Knight Ri...|   runs|          140|         N|    NA|Asad Rauf|

# Counting the number of matches played in each city.

In [51]:
spark.sql("select city,count(*) count from ipl group by city order by count(*) desc").show()

+-------------+-----+
|         city|count|
+-------------+-----+
|       Mumbai|  101|
|      Kolkata|   77|
|        Delhi|   74|
|    Bangalore|   65|
|    Hyderabad|   64|
|      Chennai|   57|
|   Chandigarh|   56|
|       Jaipur|   47|
|         Pune|   38|
|    Abu Dhabi|   29|
|        Dubai|   26|
|       Durban|   15|
|    Bengaluru|   15|
|Visakhapatnam|   13|
|           NA|   13|
|    Ahmedabad|   12|
|      Sharjah|   12|
|    Centurion|   12|
|       Rajkot|   10|
|   Dharamsala|    9|
+-------------+-----+
only showing top 20 rows



# Counting the number of matches won by each team in 2014.

In [52]:
spark.sql("select winner,count(*) count from ipl where date like '2014-%' group by winner order by count(*) desc").show()

+--------------------+-----+
|              winner|count|
+--------------------+-----+
|     Kings XI Punjab|   12|
|Kolkata Knight Ri...|   11|
| Chennai Super Kings|   10|
|      Mumbai Indians|    7|
|    Rajasthan Royals|    7|
| Sunrisers Hyderabad|    6|
|Royal Challengers...|    5|
|    Delhi Daredevils|    2|
+--------------------+-----+



# Finding the Player who won the most MOM award.

In [53]:
spark.sql("select player_of_match,count(*) count from ipl group by player_of_match order by count(*) desc").show()

+---------------+-----+
|player_of_match|count|
+---------------+-----+
| AB de Villiers|   23|
|       CH Gayle|   22|
|      RG Sharma|   18|
|      DA Warner|   17|
|       MS Dhoni|   17|
|      SR Watson|   16|
|      YK Pathan|   16|
|       SK Raina|   14|
|      G Gambhir|   13|
|        V Kohli|   13|
|     MEK Hussey|   12|
|      AM Rahane|   12|
|     KA Pollard|   11|
|       DR Smith|   11|
|     AD Russell|   11|
|       A Mishra|   11|
|       V Sehwag|   11|
|      JH Kallis|   10|
|      AT Rayudu|   10|
|       SE Marsh|    9|
+---------------+-----+
only showing top 20 rows



# Find the top 10 umpires who was present in most of the matches.

In [54]:
spark.sql("select umpire,count(*) from ((select umpire1 umpire from ipl) union all (select umpire2 umpire from ipl)) group by umpire order by count(*) desc limit 10").show()

+---------------+--------+
|         umpire|count(1)|
+---------------+--------+
|         S Ravi|     121|
|HDPK Dharmasena|      94|
|   AK Chaudhary|      87|
|  C Shamshuddin|      82|
|      M Erasmus|      65|
|      CK Nandan|      57|
|    Nitin Menon|      57|
|     SJA Taufel|      55|
|      Asad Rauf|      51|
|    VA Kulkarni|      50|
+---------------+--------+



# Count of teams who won the Match after winning the Toss.

In [55]:
spark.sql("select winner,count(*) count from ipl where toss_winner = winner group by winner order by count(*) desc").show()

+--------------------+-----+
|              winner|count|
+--------------------+-----+
|      Mumbai Indians|   61|
| Chennai Super Kings|   61|
|Kolkata Knight Ri...|   55|
|    Rajasthan Royals|   44|
|Royal Challengers...|   43|
|     Kings XI Punjab|   36|
|    Delhi Daredevils|   35|
| Sunrisers Hyderabad|   29|
|     Deccan Chargers|   19|
|      Delhi Capitals|   10|
|       Gujarat Lions|   10|
|Rising Pune Super...|    5|
|Kochi Tuskers Kerala|    4|
|Rising Pune Super...|    3|
|       Pune Warriors|    3|
+--------------------+-----+

