In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/mw_pw_profiles.csv')
df = df.head(3000)
df = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
              'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler',
              'runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman','fantasy_score_batting','fantasy_score_bowling','fantasy_score_total']]

# Function to create a simple neural network model
def create_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(8, activation='relu'),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# 1. Batting Score Model
X_batting = df[['runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman']]
y_batting = df['fantasy_score_batting']
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_batting, y_batting, test_size=0.2, random_state=42)

# Scale the features
scaler_batting = StandardScaler()
X_train_b_scaled = scaler_batting.fit_transform(X_train_b)
X_test_b_scaled = scaler_batting.transform(X_test_b)

# Create and train the model
model_batting = create_model(input_dim=X_train_b_scaled.shape[1])
model_batting.fit(X_train_b_scaled, y_train_b, epochs=50, batch_size=32, verbose=0)

# Predict and evaluate
y_pred_b = model_batting.predict(X_test_b_scaled).flatten()
mse_b = mean_squared_error(y_test_b, y_pred_b)
r2_b = r2_score(y_test_b, y_pred_b)
print(f"Mean Squared Error for batting score: {mse_b:.4f}")
print(f"R² Score for batting score: {r2_b:.4f}")

# 2. Bowling Score Model
X_bowling = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
                'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler']]
y_bowling = df['fantasy_score_bowling']
X_train_bw, X_test_bw, y_train_bw, y_test_bw = train_test_split(X_bowling, y_bowling, test_size=0.2, random_state=42)

# Scale the features
scaler_bowling = StandardScaler()
X_train_bw_scaled = scaler_bowling.fit_transform(X_train_bw)
X_test_bw_scaled = scaler_bowling.transform(X_test_bw)

# Create and train the model
model_bowling = create_model(input_dim=X_train_bw_scaled.shape[1])
model_bowling.fit(X_train_bw_scaled, y_train_bw, epochs=50, batch_size=32, verbose=0)

# Predict and evaluate
y_pred_bw = model_bowling.predict(X_test_bw_scaled).flatten()
mse_bw = mean_squared_error(y_test_bw, y_pred_bw)
r2_bw = r2_score(y_test_bw, y_pred_bw)
print(f"Mean Squared Error for bowling score: {mse_bw:.4f}")
print(f"R² Score for bowling score: {r2_bw:.4f}")

# 3. Total Score Model
X_total = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
              'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler',
              'runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman']]
y_total = df['fantasy_score_total']
X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(X_total, y_total, test_size=0.2, random_state=42)

# Scale the features
scaler_total = StandardScaler()
X_train_t_scaled = scaler_total.fit_transform(X_train_t)
X_test_t_scaled = scaler_total.transform(X_test_t)

# Create and train the model
model_total = create_model(input_dim=X_train_t_scaled.shape[1])
model_total.fit(X_train_t_scaled, y_train_t, epochs=50, batch_size=32, verbose=0)

# Predict and evaluate
y_pred_t = model_total.predict(X_test_t_scaled).flatten()
mse_t = mean_squared_error(y_test_t, y_pred_t)
r2_t = r2_score(y_test_t, y_pred_t)
print(f"Mean Squared Error for total score: {mse_t:.4f}")
print(f"R² Score for total score: {r2_t:.4f}")

  df = pd.read_csv('/content/drive/MyDrive/mw_pw_profiles.csv')
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Mean Squared Error for batting score: 4.6741
R² Score for batting score: 0.9956


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Mean Squared Error for bowling score: 12.9962
R² Score for bowling score: 0.9899


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Mean Squared Error for total score: 19.0605
R² Score for total score: 0.9912


In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/mw_pw_profiles.csv')

df = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
              'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler',
              'runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman','fantasy_score_batting','fantasy_score_bowling','fantasy_score_total']]


# Custom Linear Regression class
class LinearRegressionCustom:
    def __init__(self, learning_rate=0.001, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
        self.loss_history = []

    def fit(self, X, y):
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient descent
        for _ in range(self.n_iterations):
            # Forward pass
            y_pred = np.dot(X, self.weights) + self.bias

            # Check for NaNs in predictions
            '''if np.any(np.isnan(y_pred)):
                print("Warning: NaN detected in predictions during training")
                break'''

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Check for NaNs in gradients
            '''if np.any(np.isnan(dw)) or np.isnan(db):
                print("Warning: NaN detected in gradients")
                break'''

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Track loss (MSE)
            loss = np.mean((y_pred - y) ** 2)
            self.loss_history.append(loss)
            '''if np.isnan(loss):
                print("Warning: NaN detected in loss")
                break'''

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        '''if np.any(np.isnan(y_pred)):
            print("Warning: NaN detected in final predictions")'''
        return y_pred




# Define feature sets and targets
X_batting = df[['runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman']]
y_batting = df['fantasy_score_batting']
X_bowling = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
                'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler']]
y_bowling = df['fantasy_score_bowling']
X_total = df[['catches_taken', 'run_out_direct', 'run_out_throw', 'stumpings_done', 'balls_bowled',
              'runs_conceded', 'wickets_taken', 'bowled_done', 'lbw_done', 'maidens', 'dot_balls_as_bowler',
              'runs_scored', 'balls_faced', 'fours_scored', 'sixes_scored', 'dot_balls_as_batsman']]
y_total = df['fantasy_score_total']



# 1. Batting Score Model
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_batting, y_batting, test_size=0.2, random_state=42)

# Scale features
scaler_b = StandardScaler()
X_train_b_scaled = scaler_b.fit_transform(X_train_b)
X_test_b_scaled = scaler_b.transform(X_test_b)

# Train custom linear regression model
model_b = LinearRegressionCustom(learning_rate=0.001, n_iterations=10000)
model_b.fit(X_train_b_scaled, y_train_b)

# Predict and evaluate
y_pred_b = model_b.predict(X_test_b_scaled)
mse_b = mean_squared_error(y_test_b, y_pred_b)
r2_b = r2_score(y_test_b, y_pred_b)
print(f"\nMean Squared Error for batting score: {mse_b:.4f}")
print(f"R² Score for batting score: {r2_b:.4f}")


# 2. Bowling Score Model
X_train_bw, X_test_bw, y_train_bw, y_test_bw = train_test_split(X_bowling, y_bowling, test_size=0.2, random_state=42)

# Scale features
scaler_bw = StandardScaler()
X_train_bw_scaled = scaler_bw.fit_transform(X_train_bw)
X_test_bw_scaled = scaler_bw.transform(X_test_bw)

# Train custom linear regression model
model_bw = LinearRegressionCustom(learning_rate=0.001, n_iterations=10000)
model_bw.fit(X_train_bw_scaled, y_train_bw)

# Predict and evaluate
y_pred_bw = model_bw.predict(X_test_bw_scaled)
mse_bw = mean_squared_error(y_test_bw, y_pred_bw)
r2_bw = r2_score(y_test_bw, y_pred_bw)
print(f"Mean Squared Error for bowling score: {mse_bw:.4f}")
print(f"R² Score for bowling score: {r2_bw:.4f}")

# 3. Total Score Model
X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(X_total, y_total, test_size=0.2, random_state=42)

# Scale features
scaler_t = StandardScaler()
X_train_t_scaled = scaler_t.fit_transform(X_train_t)
X_test_t_scaled = scaler_t.transform(X_test_t)

# Train custom linear regression model
model_t = LinearRegressionCustom(learning_rate=0.001, n_iterations=10000)
model_t.fit(X_train_t_scaled, y_train_t)

# Predict and evaluate
y_pred_t = model_t.predict(X_test_t_scaled)
mse_t = mean_squared_error(y_test_t, y_pred_t)
r2_t = r2_score(y_test_t, y_pred_t)
print(f"Mean Squared Error for total score: {mse_t:.4f}")
print(f"R² Score for total score: {r2_t:.4f}")

# Plot loss history for batting model
loss_data = model_b.loss_history
iterations = list(range(1, len(loss_data) + 1))


  df = pd.read_csv('/content/drive/MyDrive/mw_pw_profiles.csv')



Mean Squared Error for batting score: 13.0818
R² Score for batting score: 0.9888
Mean Squared Error for bowling score: 34.8601
R² Score for bowling score: 0.9624
Mean Squared Error for total score: 48.0207
R² Score for total score: 0.9741
