In [None]:
import os
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline

def round_to_nearest_int(value):
    decimal_part = value - int(value)
    if decimal_part > 0.5:
        return int(value) + 1
    else:
        return int(value)

def extract_player_name(file_name):
    # Extract player name from the file name (customize as per your file naming convention)
    return file_name.split('.')[0]

def predict_wickets(file_path):
    # Load the dataset
    data = pd.read_csv(file_path)

    # Filter rows where 'Opposition' is 'Australia'
    data_australia = data[data['Opposition'] == 'India'].copy()  # Use copy to avoid SettingWithCopyWarning

    # Clean and preprocess the 'Econ' column
    data_australia['Econ'] = pd.to_numeric(data_australia['Econ'], errors='coerce')

    # Assuming your dataset has columns 'Econ', 'Runs', and 'Wkts'
    X = data_australia[['Econ', 'Runs']]
    y_wkts = data_australia['Wkts']

    # Impute missing values in 'Econ' column
    imputer = SimpleImputer(strategy='mean')

    # Create a Random Forest regressor model for 'Wkts'
    model_wkts = RandomForestRegressor(n_estimators=100, random_state=42)
    pipeline_wkts = make_pipeline(imputer, model_wkts)
    pipeline_wkts.fit(X, y_wkts)

    # Create a new DataFrame for the calculated Econ and Runs
    mean_econ = X['Econ'].mean()
    mean_runs = X['Runs'].mean()
    input_data_df = pd.DataFrame({'Econ': [mean_econ], 'Runs': [mean_runs]})

    # Impute missing values in the input features for prediction
    input_data_imputed = pd.DataFrame(imputer.transform(input_data_df), columns=input_data_df.columns)

    # Make predictions using the calculated Econ and Runs for 'Wkts'
    predicted_wkts = pipeline_wkts.predict(input_data_imputed)

    # Round predicted wickets to the nearest integer
    rounded_predicted_wkts = round_to_nearest_int(predicted_wkts[0])

    # Return the predictions and rounded values along with the player name
    return {
        'Player Name': extract_player_name(os.path.basename(file_path)),
        'Predicted Wkts': rounded_predicted_wkts,
        'Total Wkts': int(predicted_wkts[0])
    }

# Initialize total wickets
total_wkts = 0

# Example usage:
folder_path = "ausbowl"
file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".csv")]

for file_path in file_paths:
    predictions = predict_wickets(file_path)
    total_wkts += predictions['Predicted Wkts']  # Change this line to use 'Predicted Wkts'

    print(f'\nPredictions for {predictions["Player Name"]}:')
    print(f'Predicted Wkts: {predictions["Predicted Wkts"]}')

# Display total wickets across all files
print('\nTotal Wkts:', total_wkts)





Predictions for Adam Zampa:
Predicted Wkts: 2

Predictions for Glenn Maxwell:
Predicted Wkts: 0

Predictions for Cameron Green:
Predicted Wkts: 0

Predictions for Josh Hazlewood:
Predicted Wkts: 2

Predictions for Mitchell Starc:
Predicted Wkts: 1

Predictions for Pat Cummins:
Predicted Wkts: 0

Total Wkts: 5
