In [0]:
dbutils.fs.ls("/FileStore/shared_uploads/joshigaurav4301@gmail.com")

Out[145]: [FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/1800.csv', name='1800.csv', size=62728, modificationTime=1724346814000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/1800__1_.csv', name='1800__1_.csv', size=62728, modificationTime=1725390040000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/2015_summary.csv', name='2015_summary.csv', size=7080, modificationTime=1710588524000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/Book', name='Book', size=264875, modificationTime=1724588097000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/Book1.xlsx', name='Book1.xlsx', size=10224, modificationTime=1706427047000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/Marvel_Graph', name='Marvel_Graph', size=1673543, modificationTime=1726425937000),
 FileInfo(path='dbfs:/FileStore/shared_uploads/joshigaurav4301@gmail.com/Marvel_Grap

In [0]:
!pip install pyspark

You should consider upgrading via the '/local_disk0/.ephemeral_nfs/envs/pythonEnv-fbb402a9-ada3-4cf2-8ed2-3253b4f477b9/bin/python -m pip install --upgrade pip' command.[0m


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.window import Window
import pyspark.sql.functions as f

In [0]:
spark=SparkSession.builder.appName("IPLScoreCard").getOrCreate()

In [0]:
df=spark.read.option("header",True).csv("/FileStore/shared_uploads/joshigaurav4301@gmail.com/deliveries.csv")

In [0]:
df.columns

Out[150]: ['match_id',
 'inning',
 'batting_team',
 'bowling_team',
 'over',
 'ball',
 'batter',
 'bowler',
 'non_striker',
 'batsman_runs',
 'extra_runs',
 'total_runs',
 'extras_type',
 'is_wicket',
 'player_dismissed',
 'dismissal_kind',
 'fielder']

In [0]:
Int_fields=['match_id','inning','over','ball','batsman_runs','extra_runs','total_runs','is_wicket']

In [0]:
Fields=[StructField(col,IntegerType(),True) if col in Int_fields else StructField(col,StringType(),True) for col in df.columns]
Fields

Out[152]: [StructField('match_id', IntegerType(), True),
 StructField('inning', IntegerType(), True),
 StructField('batting_team', StringType(), True),
 StructField('bowling_team', StringType(), True),
 StructField('over', IntegerType(), True),
 StructField('ball', IntegerType(), True),
 StructField('batter', StringType(), True),
 StructField('bowler', StringType(), True),
 StructField('non_striker', StringType(), True),
 StructField('batsman_runs', IntegerType(), True),
 StructField('extra_runs', IntegerType(), True),
 StructField('total_runs', IntegerType(), True),
 StructField('extras_type', StringType(), True),
 StructField('is_wicket', IntegerType(), True),
 StructField('player_dismissed', StringType(), True),
 StructField('dismissal_kind', StringType(), True),
 StructField('fielder', StringType(), True)]

In [0]:
df=spark.read.option("header","true").schema(StructType(Fields)).csv("/FileStore/shared_uploads/joshigaurav4301@gmail.com/deliveries.csv")

In [0]:
df.printSchema()

root
 |-- match_id: integer (nullable = true)
 |-- inning: integer (nullable = true)
 |-- batting_team: string (nullable = true)
 |-- bowling_team: string (nullable = true)
 |-- over: integer (nullable = true)
 |-- ball: integer (nullable = true)
 |-- batter: string (nullable = true)
 |-- bowler: string (nullable = true)
 |-- non_striker: string (nullable = true)
 |-- batsman_runs: integer (nullable = true)
 |-- extra_runs: integer (nullable = true)
 |-- total_runs: integer (nullable = true)
 |-- extras_type: string (nullable = true)
 |-- is_wicket: integer (nullable = true)
 |-- player_dismissed: string (nullable = true)
 |-- dismissal_kind: string (nullable = true)
 |-- fielder: string (nullable = true)



In [0]:
newdf=df.select('match_id').distinct().orderBy(f.desc(f.col("match_id"))).cache()
new=newdf.first()

In [0]:
IPL_2024=df.filter(f.col("match_id")==new["match_id"])

In [0]:
FirstInnings=IPL_2024.filter("inning==1")

In [0]:
Manipulated_DF=FirstInnings.withColumn("over-ball",f.concat(f.lit(f.col("over")),f.lit("."),f.lit(f.col("ball"))).cast(FloatType
                                                                                                        ()))

In [0]:
Batting_Order=Manipulated_DF.groupBy("batter").agg(f.min(f.col("over-ball")).alias("order")).orderBy(f.col("order"))
Batting_Order=Batting_Order.withColumn("batting-order",f.row_number().over(Window.orderBy("order"))).select("batter","batting-order")
Batting_Order.display()

batter,batting-order
Abhishek Sharma,1
RA Tripathi,2
TM Head,3
AK Markram,4
Nithish Kumar Reddy,5
H Klaasen,6
Shahbaz Ahmed,7
Abdul Samad,8
PJ Cummins,9
JD Unadkat,10


In [0]:
Scorecard=FirstInnings.filter( (f.col("extras_type").isNull()) | (f.col("extras_type")!="wides")).groupBy("batter").agg(f.sum("batsman_runs").alias("runs"),
                                   f.count("ball").alias("balls"),
                                   f.count(f.when(f.col("batsman_runs")==4,1)).alias("fours"),
                                   f.count(f.when(f.col("batsman_runs")==6,1)).alias("sixes"),
                                   f.round(f.sum("batsman_runs")/f.count("ball")*100,2).alias("strikeRate")

                                   )

In [0]:
Total_ScoreCard=Scorecard.join(Batting_Order,on="batter",how="inner").orderBy("batting-order").select("batter","runs","balls","fours","sixes","strikeRate")
Total_ScoreCard.display()

batter,runs,balls,fours,sixes,strikeRate
Abhishek Sharma,2,5,0,0,40.0
RA Tripathi,9,13,1,0,69.23
TM Head,0,1,0,0,0.0
AK Markram,20,23,3,0,86.96
Nithish Kumar Reddy,13,10,1,1,130.0
H Klaasen,16,17,1,0,94.12
Shahbaz Ahmed,8,7,0,1,114.29
Abdul Samad,4,4,0,0,100.0
PJ Cummins,24,19,2,1,126.32
JD Unadkat,4,11,0,0,36.36


In [0]:
FirstInnings=FirstInnings.withColumn("Runwides",f.when(f.col("extras_type")=="wides",1).otherwise(0))
FirstInnings=FirstInnings.withColumn("RunsInOver",f.sum(f.col("batsman_runs")+f.col("Runwides")).over(Window.partitionBy("bowler","over")))
FirstInnings.display()

match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder,Runwides,RunsInOver
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,1,H Klaasen,AD Russell,AK Markram,1,0,1,,0,,,,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,2,AK Markram,AD Russell,H Klaasen,0,0,0,,1,AK Markram,caught,MA Starc,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,3,Shahbaz Ahmed,AD Russell,H Klaasen,2,0,2,,0,,,,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,4,Shahbaz Ahmed,AD Russell,H Klaasen,0,0,0,,0,,,,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,5,Shahbaz Ahmed,AD Russell,H Klaasen,0,0,0,,0,,,,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,10,6,Shahbaz Ahmed,AD Russell,H Klaasen,6,0,6,,0,,,,0,9
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,12,1,Abdul Samad,AD Russell,H Klaasen,2,0,2,,0,,,,0,10
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,12,2,Abdul Samad,AD Russell,H Klaasen,1,0,1,,0,,,,0,10
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,12,3,H Klaasen,AD Russell,Abdul Samad,1,0,1,,0,,,,0,10
1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,12,4,Abdul Samad,AD Russell,H Klaasen,0,1,1,wides,0,,,,1,10


In [0]:
Bowling_1stInnings=FirstInnings.groupBy("bowler").agg(
    f.sum(f.col("batsman_runs")+f.col("Runwides")).alias("Runs"),
    f.sum("is_wicket").alias("Wicket"),
    f.count(f.when(f.col("Runwides")==0,1)).alias("Balls"),
    f.round(f.sum(f.col("batsman_runs")+f.col("Runwides"))*6/ f.count(f.when(f.col("Runwides")==0,1)),2).alias("Economy"),
    f.count(f.when(f.col("RunsInOver")==0,1)).cast(IntegerType()).alias("Maidens")
)
Bowling_1stInnings=Bowling_1stInnings.withColumn("Maidens",f.col("Maidens")/6)
Bowling_1stInnings=Bowling_1stInnings.withColumn("Maidens",f.col("Maidens").cast(IntegerType()))
Bowling_1stInnings=Bowling_1stInnings.withColumn("Overs",f.col("Balls")/6).withColumn("Overs",f.col("Overs").cast(IntegerType()))
Bowling_1stInnings=Bowling_1stInnings.withColumn("ExtraBalls",f.col("Balls")%6).withColumn("NewOvers",f.concat(f.lit(f.col("Overs")),f.lit("."),f.lit(f.col("ExtraBalls"))))
Bowling_1stInnings=Bowling_1stInnings.withColumn("Overs",f.col("NewOvers").cast(DoubleType())).select("bowler","Overs","Maidens","Runs","Wicket","Economy")
                                                                                                               
Bowling_1stInnings.display()

bowler,Overs,Maidens,Runs,Wicket,Economy
Harshit Rana,4.0,1,24,2,6.0
SP Narine,4.0,0,16,1,4.0
MA Starc,3.0,0,14,2,4.67
AD Russell,2.3,0,19,3,7.6
VG Arora,3.0,0,24,1,8.0
CV Varun,2.0,0,9,1,4.5
