In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression

data = {'Player': ['Player A', 'Player B', 'Player C', 'Player D', 'Player E'],
        'MatchesPlayed': [10, 12, 8, 15, 11],
        'Runs': [500, 650, 400, 700, 550],
        'BattingAvg': [50.0, 54.17, 50.0, 46.67, 50.0],
        'Wickets': [5, 10, 3, 8, 6],
        'BowlingAvg': [30.0, 25.0, 40.0, 35.0, 32.0],
        'StrikeRate': [100.0, 110.0, 90.0, 105.0, 95.0],
        'CurrentRank': [5, 3, 8, 2, 6]}
df = pd.DataFrame(data)

features = ['MatchesPlayed', 'Runs', 'BattingAvg', 'Wickets', 'BowlingAvg', 'StrikeRate']
target = 'CurrentRank'

X = df[features]
y = df[target]

model = LinearRegression()
model.fit(X, y)

future_performance_data = {'Player': ['Player F', 'Player G'],
                           'MatchesPlayed': [18, 14],
                           'Runs': [800, 700],
                           'BattingAvg': [55.0, 50.0],
                           'Wickets': [12, 9],
                           'BowlingAvg': [22.0, 28.0],
                           'StrikeRate': [120.0, 100.0]}
future_df = pd.DataFrame(future_performance_data)

future_X = future_df[features]

predicted_ranks = model.predict(future_X)

future_df['PredictedRank'] = predicted_ranks

print(future_df[['Player', 'PredictedRank']])


     Player  PredictedRank
0  Player F      -0.244947
1  Player G       4.041645


In [5]:
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(random_state=42)

model.fit(X, y)

predicted_ranks = model.predict(future_X)

future_df['PredictedRank'] = predicted_ranks

print(future_df[['Player', 'PredictedRank']])


     Player  PredictedRank
0  Player F            2.0
1  Player G            2.0


In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = {'Player': ['Player A', 'Player B', 'Player C', 'Player D', 'Player E', 'Player F', 'Player G', 'Player H', 'Player I', 'Player J', 'Player K', 'Player L', 'Player M', 'Player N', 'Player O'],
        'MatchesPlayed': [10, 12, 8, 15, 11, 18, 14, 20, 5, 10, 16, 13, 9, 7, 22],
        'Runs': [500, 650, 400, 700, 550, 800, 700, 900, 200, 500, 750, 600, 450, 350, 1000],
        'BattingAvg': [50.0, 54.17, 50.0, 46.67, 50.0, 55.0, 50.0, 60.0, 40.0, 50.0, 48.0, 46.0, 50.0, 50.0, 45.0],
        'Wickets': [5, 10, 3, 8, 6, 12, 9, 15, 1, 5, 10, 7, 4, 2, 18],
        'BowlingAvg': [30.0, 25.0, 40.0, 35.0, 32.0, 22.0, 28.0, 20.0, 50.0, 30.0, 26.0, 33.0, 38.0, 45.0, 18.0],
        'StrikeRate': [100.0, 110.0, 90.0, 105.0, 95.0, 120.0, 100.0, 130.0, 80.0, 100.0, 115.0, 102.0, 92.0, 85.0, 140.0],
        'CurrentRank': [5, 3, 8, 2, 6, 1, 4, 1, 10, 5, 3, 6, 9, 11, 1]}  # Introducing imbalance in CurrentRank
df = pd.DataFrame(data)

features = ['MatchesPlayed', 'Runs', 'BattingAvg', 'Wickets', 'BowlingAvg', 'StrikeRate']
target = 'CurrentRank'

X = df[features]
y = df[target]

# Preprocessing: Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_scaled, y)

future_performance_data = {'Player': ['Player P', 'Player Q'],
                           'MatchesPlayed': [19, 15],
                           'Runs': [850, 720],
                           'BattingAvg': [56.0, 51.0],
                           'Wickets': [13, 10],
                           'BowlingAvg': [21.0, 27.0],
                           'StrikeRate': [125.0, 105.0]}
future_df = pd.DataFrame(future_performance_data)

future_X = future_df[features]

# Preprocess future data same scalars
future_X_scaled = scaler.transform(future_X)

predicted_ranks_lr = model.predict(future_X_scaled)

future_df['PredictedRank_LR'] = predicted_ranks_lr

print("Linear Regression Predictions:")
print(future_df[['Player', 'PredictedRank_LR']])


model_dt = DecisionTreeRegressor(random_state=42)

model_dt.fit(X_scaled, y)

predicted_ranks_dt = model_dt.predict(future_X_scaled)

future_df['PredictedRank_DT'] = predicted_ranks_dt

print("\nDecision Tree Regression Predictions:")
print(future_df[['Player', 'PredictedRank_DT']])


Linear Regression Predictions:
     Player  PredictedRank_LR
0  Player P          1.186561
1  Player Q          3.432928

Decision Tree Regression Predictions:
     Player  PredictedRank_DT
0  Player P               1.0
1  Player Q               2.0


In [9]:
for index, row in df.iterrows():
  player_data = row[features].values.reshape(1, -1)
  player_data_scaled = scaler.transform(player_data)

  predicted_rank_lr = model.predict(player_data_scaled)[0]
  predicted_rank_dt = model_dt.predict(player_data_scaled)[0]

  print(f"Player: {row['Player']}, Predicted Rank (LR): {predicted_rank_lr:.2f}, Predicted Rank (DT): {predicted_rank_dt:.2f}")

Player: Player A, Predicted Rank (LR): 5.36, Predicted Rank (DT): 5.00
Player: Player B, Predicted Rank (LR): 4.38, Predicted Rank (DT): 3.00
Player: Player C, Predicted Rank (LR): 8.05, Predicted Rank (DT): 8.00
Player: Player D, Predicted Rank (LR): 4.49, Predicted Rank (DT): 2.00
Player: Player E, Predicted Rank (LR): 5.50, Predicted Rank (DT): 6.00
Player: Player F, Predicted Rank (LR): 1.68, Predicted Rank (DT): 1.00
Player: Player G, Predicted Rank (LR): 3.76, Predicted Rank (DT): 4.00
Player: Player H, Predicted Rank (LR): 1.07, Predicted Rank (DT): 1.00
Player: Player I, Predicted Rank (LR): 11.26, Predicted Rank (DT): 10.00
Player: Player J, Predicted Rank (LR): 5.36, Predicted Rank (DT): 5.00
Player: Player K, Predicted Rank (LR): 2.62, Predicted Rank (DT): 3.00
Player: Player L, Predicted Rank (LR): 4.94, Predicted Rank (DT): 6.00
Player: Player M, Predicted Rank (LR): 7.34, Predicted Rank (DT): 9.00
Player: Player N, Predicted Rank (LR): 9.40, Predicted Rank (DT): 11.00
Pla

