# **Task 03 :**
* **CRICKET FIELDING ANALYSIS DATA COLLECTION**

In [5]:
import pandas as pd

# Read CSV files
deliveries = pd.read_csv("deliveries.csv")
matches = pd.read_csv("matches.csv")

# Display the first few rows of each file to understand the structure
print("Deliveries Data Preview:")
print(deliveries.head())

print("\nMatches Data Preview:")
print(matches.head())


Deliveries Data Preview:
       id  inning  over  ball      batsman  non_striker     bowler  \
0  335982       1     6     5   RT Ponting  BB McCullum  AA Noffke   
1  335982       1     6     6  BB McCullum   RT Ponting  AA Noffke   
2  335982       1     7     1  BB McCullum   RT Ponting     Z Khan   
3  335982       1     7     2  BB McCullum   RT Ponting     Z Khan   
4  335982       1     7     3   RT Ponting  BB McCullum     Z Khan   

   batsman_runs  extra_runs  total_runs  non_boundary  is_wicket  \
0             1           0           1             0          0   
1             1           0           1             0          0   
2             0           0           0             0          0   
3             1           0           1             0          0   
4             1           0           1             0          0   

  dismissal_kind player_dismissed fielder extras_type           batting_team  \
0            NaN              NaN     NaN         NaN  Kolkata Kn

### **Step 2: Filter Data for Fielding Actions**

In [6]:
# Filter fielding data: rows where fielding actions were recorded (fielder column is not null)
fielding_data = deliveries.dropna(subset=['fielder'])

# Check if essential columns are present
fielding_columns = ['fielder', 'dismissal_kind', 'batsman_runs']
if not all(col in deliveries.columns for col in fielding_columns):
    print("Essential fielding columns not found in the dataset. Please check the file structure.")
else:
    print("Fielding data filtered successfully.")


Fielding data filtered successfully.


### **Step 3: Calculate Total Fielding Actions per Player**

In [7]:
# Count total fielding actions per player
total_actions = fielding_data['fielder'].value_counts().reset_index()
total_actions.columns = ['Player', 'Total_Fielding_Actions']
print("\nTotal Fielding Actions Per Player:")
print(total_actions)



Total Fielding Actions Per Player:
                             Player  Total_Fielding_Actions
0                          MS Dhoni                     164
1                        KD Karthik                     156
2                        RV Uthappa                     123
3                    AB de Villiers                     117
4                          SK Raina                     105
..                              ...                     ...
874              RA Jadeja,BJ Hodge                       1
875        DT Christian,Anand Rajan                       1
876               Y Nagar,IK Pathan                       1
877  AB de Villiers,KB Arun Karthik                       1
878                         P Dubey                       1

[879 rows x 2 columns]


### **Step 4: Categorize Specific Fielding Actions**

In [9]:
# Count each type of fielding action
action_counts = fielding_data.groupby(['fielder', 'dismissal_kind']).size().unstack(fill_value=0)
print("\nSpecific Fielding Actions (Catches, Stops, etc.) Per Player:")
print(action_counts)



Specific Fielding Actions (Catches, Stops, etc.) Per Player:
dismissal_kind            caught  run out  stumped
fielder                                           
A Ashish Reddy                 8        1        0
A Ashish Reddy,J Theron        0        1        0
A Chandila                     2        0        0
A Chopra                       2        0        0
A Chopra,BB McCullum           0        1        0
...                          ...      ...      ...
Yuvraj Singh,PP Chawla         0        1        0
Yuvraj Singh,RV Uthappa        0        1        0
Z Khan                        20        2        0
Z Khan,KK Nair,Q de Kock       0        1        0
Z Khan,Q de Kock               0        1        0

[879 rows x 3 columns]


### **Step 5: Calculate Runs Saved by Each Player**

In [11]:
# Calculate runs saved (assuming batsman_runs == 0 is a saved run)
fielding_data['runs_saved'] = fielding_data['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
runs_saved = fielding_data.groupby('fielder')['runs_saved'].sum().reset_index()
runs_saved.columns = ['Player', 'Total_Runs_Saved']
print("\nTotal Runs Saved Per Player:")
print(runs_saved)



Total Runs Saved Per Player:
                       Player  Total_Runs_Saved
0              A Ashish Reddy                 9
1     A Ashish Reddy,J Theron                 1
2                  A Chandila                 2
3                    A Chopra                 2
4        A Chopra,BB McCullum                 1
..                        ...               ...
874    Yuvraj Singh,PP Chawla                 1
875   Yuvraj Singh,RV Uthappa                 1
876                    Z Khan                22
877  Z Khan,KK Nair,Q de Kock                 0
878          Z Khan,Q de Kock                 1

[879 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fielding_data['runs_saved'] = fielding_data['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)


### **Step 6: Identify Players with Most Missed Fields**

In [12]:
# Count missed fields if 'missed_field' category exists in dismissal_kind
missed_fields = fielding_data[fielding_data['dismissal_kind'] == 'missed_field']['fielder'].value_counts().reset_index()
missed_fields.columns = ['Player', 'Missed_Fields']
print("\nPlayers with Most Missed Fields:")
print(missed_fields.head())



Players with Most Missed Fields:
Empty DataFrame
Columns: [Player, Missed_Fields]
Index: []


### **Step 7: Compile Comprehensive Fielding Performance Summary**

In [14]:
# Combine all summaries into one DataFrame
summary = total_actions.merge(action_counts, left_on='Player', right_index=True, how='left')
summary = summary.merge(runs_saved, on='Player', how='left')
summary = summary.merge(missed_fields, on='Player', how='left')
summary.fillna(0, inplace=True)  # Fill NaNs with 0 for players with no missed fields or runs saved

print("\nComprehensive Fielding Performance Summary:")
print(summary)



Comprehensive Fielding Performance Summary:
                             Player  Total_Fielding_Actions  caught  run out  \
0                          MS Dhoni                     164     113       12   
1                        KD Karthik                     156     118        8   
2                        RV Uthappa                     123      87        4   
3                    AB de Villiers                     117     103        6   
4                          SK Raina                     105      99        6   
..                              ...                     ...     ...      ...   
874              RA Jadeja,BJ Hodge                       1       0        1   
875        DT Christian,Anand Rajan                       1       0        1   
876               Y Nagar,IK Pathan                       1       0        1   
877  AB de Villiers,KB Arun Karthik                       1       0        1   
878                         P Dubey                       1       1        

### **Step 8: Save the Summary to an Excel File**

In [16]:
# Save summary to an Excel file

summary.to_excel("Fielding_Performance_Summary.xlsx", index=False)
print(f"\nComprehensive fielding performance summary saved to {output_path}")


Comprehensive fielding performance summary saved to /mnt/data/Fielding_Performance_Summary.xlsx


### **Summary :**
**This breakdown structures the code into logical steps that tackle each task in sequence:**

* Load and preview data.
* Filter for fielding actions.
* Calculate total fielding actions.
* Categorize fielding action types.
* Calculate runs saved.
* Identify missed fields.
* Compile a comprehensive summary.
* Save the summary as an Excel file.