### IMPORTS

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv("players_stats.csv")
df.fillna(0,inplace=True)

In [3]:
nbrs = NearestNeighbors(n_neighbors=6).fit(df[['Batting_avg', 'Batting_strikerate',
       'Batting_total_runs', 'Batting_boundary', 'Batting_dot',
       'Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
       'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg',
       'Bowling_strikerate']])

In [4]:
df.columns

Index(['Player_name', 'Batting_avg', 'Batting_strikerate',
       'Batting_total_runs', 'Batting_boundary', 'Batting_dot',
       'Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
       'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg',
       'Bowling_strikerate'],
      dtype='object')

In [5]:
query_player = df[df['Player_name'] == 'V Kohli'][['Batting_avg', 'Batting_strikerate',
       'Batting_total_runs', 'Batting_boundary', 'Batting_dot',
       'Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
       'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg',
       'Bowling_strikerate']]

In [6]:
distances, indices = nbrs.kneighbors(query_player)

In [7]:
similar_players_indices = indices[0][1:]  # Exclude the first one (itself)

# Get the names of the similar players
most_similar_players = df.iloc[similar_players_indices]['Player_name'].tolist()
1
print("5 most similar players:", most_similar_players)

5 most similar players: ['S Dhawan', 'RG Sharma', 'DA Warner', 'SK Raina', 'MS Dhoni']


class PlayerSimilarity:
    def __init__(self,df):
        self.df = pd.DataFrame(df)
        self.model = NearestNeighbors(n_neighbors=6)
        
    def Player_finder(self,Player_name):
        self.all_features = ['Batting_avg', 'Batting_strikerate',
       'Batting_total_runs', 'Batting_boundary', 'Batting_dot',
       'Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
       'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg',
       'Bowling_strikerate']
        self.model.fit(self.df[self.all_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.all_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]
        
    def Bowler_finder(self,Player_name):
        self.bowler_features = ['Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
       'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg','Bowling_strikerate']
        self.model.fit(self.df[self.bowler_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.bowler_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]
        
    def Batsmen_finder(self,Player_name):
        self.batter_features = ['Batting_avg', 'Batting_strikerate',
       'Batting_total_runs', 'Batting_boundary', 'Batting_dot']
        self.model.fit(self.df[self.batter_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.batter_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

class PlayerSimilarity:
    def __init__(self, df):
        self.df = pd.DataFrame(df)
        self.scaler = StandardScaler()
        self.model = NearestNeighbors(n_neighbors=6)
        
    def Player_finder(self, Player_name):
        self.all_features = ['Batting_avg', 'Batting_strikerate',
                             'Batting_total_runs', 'Batting_boundary', 'Batting_dot',
                             'Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
                             'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg',
                             'Bowling_strikerate']
        
        # Standardize all features
        self.df[self.all_features] = self.scaler.fit_transform(self.df[self.all_features])
        
        # Fit and find similar players
        self.model.fit(self.df[self.all_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.all_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]
        
    def Bowler_finder(self, Player_name):
        self.bowler_features = ['Bowling_balls', 'Bowling_over', 'Bowling_runs', 'Bowling_economy',
                                'Bowling_wickets', 'Bowling_total_runs', 'Bowling_avg', 'Bowling_strikerate']
        
        # Standardize bowler features
        self.df[self.bowler_features] = self.scaler.fit_transform(self.df[self.bowler_features])
        
        # Fit and find similar bowlers
        self.model.fit(self.df[self.bowler_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.bowler_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]
        
    def Batsmen_finder(self, Player_name):
        self.batter_features = ['Batting_avg', 'Batting_strikerate',
                                'Batting_total_runs', 'Batting_boundary', 'Batting_dot']
        
        # Standardize batter features
        self.df[self.batter_features] = self.scaler.fit_transform(self.df[self.batter_features])
        
        # Fit and find similar batsmen
        self.model.fit(self.df[self.batter_features])
        self.query_player = self.df[self.df['Player_name'] == Player_name][self.batter_features]
        self.distances, self.indices = self.model.kneighbors(self.query_player)
        self.similar_players = self.df.iloc[self.indices[0]]['Player_name'].values
        return self.similar_players[1:]


In [9]:
a = PlayerSimilarity(df)
a.Batsmen_finder("V Kohli")

array(['S Dhawan', 'DA Warner', 'RG Sharma', 'SK Raina', 'MS Dhoni'],
      dtype=object)

In [10]:
a.Player_finder("V Kohli")

array(['RG Sharma', 'SK Raina', 'CH Gayle', 'S Dhawan', 'DA Warner'],
      dtype=object)

In [11]:
a.Bowler_finder("V Kohli")

array(['TM Dilshan', 'Parvez Rasool', 'AC Voges', 'M Siddharth',
       'D Salunkhe'], dtype=object)

In [12]:
df[df["Player_name"]=="V Kohli"].iloc[:,7:]

Unnamed: 0,Bowling_over,Bowling_runs,Bowling_economy,Bowling_wickets,Bowling_total_runs,Bowling_avg,Bowling_strikerate
7,41.833333,367.0,8.772908,5.0,73.4,50.2,14.342629


In [13]:
df[df["Player_name"]=="TM Dilshan"].iloc[:,7:]

Unnamed: 0,Bowling_over,Bowling_runs,Bowling_economy,Bowling_wickets,Bowling_total_runs,Bowling_avg,Bowling_strikerate
135,45.166667,365.0,8.081181,5.0,73.0,54.2,15.498155


In [14]:
b = PlayerSimilarity(df)
b.Batsmen_finder("JJ Bumrah")

array(['HE van der Dussen', 'KW Richardson', 'M Rawat', 'JDS Neesham',
       'Navdeep Saini'], dtype=object)

In [15]:
b.Bowler_finder("JJ Bumrah")

array(['SL Malinga', 'A Mishra', 'Rashid Khan', 'Sandeep Sharma',
       'DJ Bravo'], dtype=object)