### Import Libraries

In [36]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder

### Read the CSV dataset File

In [37]:
df = pd.read_csv('t20i_Matches_Data.csv')

In [38]:
display(df.head())

Unnamed: 0,Team1 Name,Team1 Runs Scored,Team1 Wickets Fell,Team2 Name,Team2 Runs Scored,Team2 Wickets Fell,Match Venue (Stadium),Match Venue (City),Match Venue (Country),Toss Winner,Toss Winner Choice,Match Winner
0,India,74.0,10.0,Australia,75.0,1.0,Melbourne Cricket Ground,Melbourne,Australia,India,bat,Australia
1,England,193.0,8.0,New Zealand,143.0,8.0,Jade Stadium,Christchurch,New Zealand,England,bat,England
2,Scotland,107.0,8.0,Netherlands,110.0,5.0,Civil Service Cricket Club,Belfast,Ireland,Netherlands,bowl,Netherlands
3,Kenya,106.0,9.0,Scotland,107.0,1.0,Civil Service Cricket Club,Belfast,Ireland,Kenya,bat,Scotland
4,Zimbabwe,106.0,8.0,Sri Lanka,107.0,5.0,Maple Leaf North-West Ground,King City,Canada,Sri Lanka,bowl,Sri Lanka


In [39]:
# Define categorical columns
categorical_cols = [
    'Team1 Name','Team2 Name','Match Venue (Stadium)',
    'Match Venue (City)','Match Venue (Country)',
    'Toss Winner','Match Winner'
]

### Label Encode - Team names ans Match Venues

In [40]:
# Dictionary to store encoders and mappings
encoders = {}
mappings = {}

# Apply Label Encoding
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    encoders[col] = le
    mappings[col] = dict(zip(le.classes_, le.transform(le.classes_)))

In [41]:
# If you want all mappings printed:
for col, mapping in mappings.items():
    print(f"\n{col} Mapping:")
    print(mapping)


Team1 Name Mapping:
{'Afghanistan': np.int64(0), 'Argentina': np.int64(1), 'Australia': np.int64(2), 'Austria': np.int64(3), 'Bahamas': np.int64(4), 'Bahrain': np.int64(5), 'Bangladesh': np.int64(6), 'Belgium': np.int64(7), 'Belize': np.int64(8), 'Bermuda': np.int64(9), 'Bhutan': np.int64(10), 'Botswana': np.int64(11), 'Brazil': np.int64(12), 'Bulgaria': np.int64(13), 'Cambodia': np.int64(14), 'Cameroon': np.int64(15), 'Canada': np.int64(16), 'Cayman Is': np.int64(17), 'Chile': np.int64(18), 'China': np.int64(19), 'Cook Islands': np.int64(20), 'Costa Rica': np.int64(21), 'Croatia': np.int64(22), 'Cyprus': np.int64(23), 'Czech Rep.': np.int64(24), 'Denmark': np.int64(25), 'England': np.int64(26), 'Estonia': np.int64(27), 'Eswatini': np.int64(28), 'Fiji': np.int64(29), 'Finland': np.int64(30), 'France': np.int64(31), 'Gambia': np.int64(32), 'Germany': np.int64(33), 'Ghana': np.int64(34), 'Gibraltar': np.int64(35), 'Greece': np.int64(36), 'Guernsey': np.int64(37), 'Hong Kong': np.int64(3

In [42]:
display(df.head())

Unnamed: 0,Team1 Name,Team1 Runs Scored,Team1 Wickets Fell,Team2 Name,Team2 Runs Scored,Team2 Wickets Fell,Match Venue (Stadium),Match Venue (City),Match Venue (Country),Toss Winner,Toss Winner Choice,Match Winner
0,40,74.0,10.0,2,75.0,1.0,118,111,1,41,bat,2
1,26,193.0,8.0,65,143.0,8.0,83,40,41,26,bat,26
2,80,107.0,8.0,64,110.0,5.0,32,15,28,64,bowl,61
3,49,106.0,9.0,80,107.0,1.0,32,15,28,49,bat,77
4,102,106.0,8.0,89,107.0,5.0,113,92,8,89,bowl,84


### One-Hot Encode - Toss winner choice

In [43]:
# One-Hot Encode Toss Winner Choice
df = pd.get_dummies(df, columns=['Toss Winner Choice'], drop_first=False)

# Save mapping for Toss Winner Choice (manually from the columns)
mappings['Toss Winner Choice'] = list(df.filter(like='Toss Winner Choice_').columns)

In [44]:
# --- Display Results ---

# Show first few rows
print(df.head())

# Print mappings
for col, mapping in mappings.items():
    print(f"\n{col} Mapping:")
    print(mapping)

# Example: reverse lookup for Match Winner
print("\nDecode numbers back to names (Match Winner):")
print(encoders['Match Winner'].inverse_transform([0,1,2,3]))

   Team1 Name  Team1 Runs Scored  Team1 Wickets Fell  Team2 Name  \
0          40               74.0                10.0           2   
1          26              193.0                 8.0          65   
2          80              107.0                 8.0          64   
3          49              106.0                 9.0          80   
4         102              106.0                 8.0          89   

   Team2 Runs Scored  Team2 Wickets Fell  Match Venue (Stadium)  \
0               75.0                 1.0                    118   
1              143.0                 8.0                     83   
2              110.0                 5.0                     32   
3              107.0                 1.0                     32   
4              107.0                 5.0                    113   

   Match Venue (City)  Match Venue (Country)  Toss Winner  Match Winner  \
0                 111                      1           41             2   
1                  40                 

### Write cleaned data into CSV file

In [45]:
# Write processed dataset back to the same file
df.to_csv("t20i_Matches_Data.csv", index=False)
print("Encoded dataset saved successfully ✅")

Encoded dataset saved successfully ✅


### Write Mapping data of Encoded data into JSON file

In [46]:
import json

# Convert numpy.int64 -> Python int
clean_mappings = {}
for col, mapping in mappings.items():
    if isinstance(mapping, dict):
        clean_mappings[col] = {str(k): int(v) for k, v in mapping.items()}
    else:
        # For Toss Winner Choice (list of strings from One-Hot Encoding)
        clean_mappings[col] = [str(v) for v in mapping]

# Save as JSON
with open("categorical_mappings.json", "w") as f:
    json.dump(clean_mappings, f, indent=4)

print("Mappings saved to categorical_mappings.json ✅")



Mappings saved to categorical_mappings.json ✅
