In [None]:
import os
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline

def round_to_nearest_int(value):
    decimal_part = value - int(value)
    if decimal_part > 0.5:
        return int(value) + 1
    else:
        return int(value)

def extract_player_name(file_name):
    # Extract player name from the file name (customize as per your file naming convention)
    return file_name.split('.')[0]

def predict_runs_sixes(file_path):
    # Load the dataset
    data = pd.read_csv(file_path)

    # Clean and preprocess the 'SR' column
    data['SR'] = pd.to_numeric(data['SR'], errors='coerce')

    # Assuming your dataset has columns 'SR', 'Runs', and '6s'
    X = data[['SR']]
    y_runs = data['Runs']
    y_6s = data['6s']

    # Impute missing values in 'SR' column
    imputer = SimpleImputer(strategy='mean')

    # Create a Random Forest regressor model for 'Runs'
    model_runs = RandomForestRegressor(n_estimators=100, random_state=42)
    pipeline_runs = make_pipeline(imputer, model_runs)
    pipeline_runs.fit(X, y_runs)

    # Create a Random Forest regressor model for '6s'
    model_6s = RandomForestRegressor(n_estimators=100, random_state=42)
    pipeline_6s = make_pipeline(imputer, model_6s)
    pipeline_6s.fit(X, y_6s)

    # Create a new DataFrame for the calculated SR
    mean_sr = X['SR'].mean()
    input_sr_df = pd.DataFrame({'SR': [mean_sr]})

    # Impute missing values in the input 'SR' for prediction
    input_sr_imputed = pd.DataFrame(imputer.transform(input_sr_df), columns=input_sr_df.columns)

    # Make predictions using the calculated SR for 'Runs' and '6s'
    predicted_runs = pipeline_runs.predict(input_sr_imputed)
    predicted_6s = pipeline_6s.predict(input_sr_imputed)

    # Round predicted runs and sixes to the nearest integer
    rounded_predicted_runs = round_to_nearest_int(predicted_runs[0])
    rounded_predicted_6s = round_to_nearest_int(predicted_6s[0])

    # Return the predictions and rounded values along with the player name
    return {
        'Player Name': extract_player_name(os.path.basename(file_path)),
        'Predicted Runs': rounded_predicted_runs,
        'Predicted 6s': rounded_predicted_6s,
        'Total Runs': int(predicted_runs[0]),
        'Total 6s': int(predicted_6s[0])
    }

# Initialize total runs and total sixes
total_runs = 0
total_sixes = 0

# Initialize a list to store predicted scores
predicted_scores = []

# Example usage:
folder_path = "indiabat"
file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".csv")]

for file_path in file_paths:
    predictions = predict_runs_sixes(file_path)
    total_runs += predictions['Total Runs']
    total_sixes += predictions['Total 6s']

    # Add the predicted score to the list
    predicted_scores.append(predictions['Total Runs'])

    print(f'\nPredictions for {predictions["Player Name"]}:')
    print(f'Predicted Runs: {predictions["Predicted Runs"]}')
    print(f'Predicted 6s: {predictions["Predicted 6s"]}')


Match_Total_runs = sorted(predicted_scores, reverse=True)[:7]
sum_runs = sum(Match_Total_runs)
# Display total runs and sixes across all files
print('\nTotal Runs :', sum_runs)
print('Total 6s :', total_sixes)




Predictions for Kuldeep Yadav:
Predicted Runs: 9
Predicted 6s: 0

Predictions for Rohith Sharma:
Predicted Runs: 107
Predicted 6s: 5

Predictions for Shubman Gil:
Predicted Runs: 50
Predicted 6s: 2

Predictions for Jadeja:
Predicted Runs: 35
Predicted 6s: 1

Predictions for klrahul:
Predicted Runs: 41
Predicted 6s: 0

Predictions for Bumrah:
Predicted Runs: 2
Predicted 6s: 0

Predictions for Virat Kohli:
Predicted Runs: 67
Predicted 6s: 0

Predictions for Shami:
Predicted Runs: 12
Predicted 6s: 0

Predictions for SuryaKumar Yadav:
Predicted Runs: 12
Predicted 6s: 0

Predictions for Shreyas Iyer:
Predicted Runs: 46
Predicted 6s: 1

Predictions for Siraj:
Predicted Runs: 6
Predicted 6s: 0

Total Runs : 354
Total 6s : 6
