In [None]:
import pandas as pd
import random

# Load the source and target Excel files into DataFrames
source_file_path = 'final_data.xlsx'  # Replace with the source file path
target_file_path = 'final_data.xlsx'  # Replace with the target file path


source_df = pd.read_excel(source_file_path)
target_df = pd.read_excel(target_file_path)

# Initialize a list to store the indices of the rows to move
rows_to_move = []

# Check if there are enough rows to move
if len(source_df) < 1000:
    print("There are not enough rows to move 1000 rows.")
else:
    # Generate 1000 unique random row indices (excluding the header)
    while len(rows_to_move) < 1000:
        random_row_index = random.randint(1, len(source_df) - 1)
        if random_row_index not in rows_to_move:
            rows_to_move.append(random_row_index)

    # Copy the selected rows to the target DataFrame
    selected_rows = source_df.iloc[rows_to_move]
    target_df = pd.concat([target_df, selected_rows])

    # Delete the selected rows from the source DataFrame
    source_df = source_df.drop(source_df.index[rows_to_move])

    # Save the updated DataFrames back to their respective Excel files
    with pd.ExcelWriter(source_file_path, engine='openpyxl', mode='a') as source_writer:
        source_df.to_excel(source_writer, index=False)

    with pd.ExcelWriter(target_file_path, engine='openpyxl', mode='a') as target_writer:
        target_df.to_excel(target_writer, index=False)

    print(f"Moved 1000 rows from {source_file_path} to {target_file_path}.")

# Reset the random seed for reusability
random.seed()


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load the data
final_df = pd.read_excel("final_data.xlsx")

# Extract features and target
X = final_df.drop(['result'], axis=1)
y = final_df['result']

# Replace inf and negative values in 'cur_run_rate' with zeros
X['cur_run_rate'] = X['cur_run_rate'].apply(lambda x: 0 if x <= 0 or np.isinf(x) else x)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Batting_team', 'Bowling_team', 'city']

for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    X[col] = label_encoders[col].fit_transform(X[col])

# Split the data into training (70%), validation (15%), and testing (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=45)
X_validate, X_test, y_validate, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=45)

# Create and train the RandomForestClassifier with your specified hyperparameters
clf = RandomForestClassifier(n_estimators=200, max_depth=30, min_samples_split=2, min_samples_leaf=1, random_state=45)
clf.fit(X_train, y_train)

# Make predictions on the validation set
validate_pred = clf.predict(X_validate)

# Evaluate the classifier's performance on the validation set
validate_accuracy = accuracy_score(y_validate, validate_pred)
print("Validation Accuracy:", validate_accuracy)

# Print a classification report for the validation set
print("Validation Classification Report:")
print(classification_report(y_validate, validate_pred))

# Make predictions on the testing set
test_pred = clf.predict(X_test)

# Evaluate the classifier's performance on the testing set
test_accuracy = accuracy_score(y_test, test_pred)
print("Testing Accuracy:", test_accuracy)

# Print a classification report for the testing set
print("Testing Classification Report:")
print(classification_report(y_test, test_pred))
