<a href="https://colab.research.google.com/github/Shadow-Walker360/Shadow-Walker360/blob/main/scratch_card_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import logging
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
import time

# Set up logging to file
logging.basicConfig(filename='model_training.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to generate synthetic scratch card data
def generate_scratch_card_data(num_samples=10000, card_length=10):
    try:
        cards = []
        targets = []

        for _ in range(num_samples):
            # Generate random card numbers (e.g., between 0 and 9)
            card = np.random.randint(0, 10, size=card_length).tolist()
            cards.append(''.join(map(str, card)))  # Convert to string for card representation
            # The target can be something like the sum of the card's numbers
            target = sum(card)  # This can be changed to something more complex
            targets.append(target)

        data = pd.DataFrame({
            'card': cards,
            'target': targets
        })

        logging.info(f"Dataset generated successfully with {num_samples} samples.")
        return data
    except Exception as e:
        logging.error(f"Error in generating scratch card data: {e}")
        raise

# Function to preprocess and train models
def train_models(data):
    try:
        # Preprocessing - Convert 'card' into a numeric format
        data['card'] = data['card'].apply(lambda x: [int(i) for i in x])  # Convert string to list of integers
        X = np.array(data['card'].tolist())
        y = np.array(data['target'])

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        logging.info("Data preprocessing completed. Splitting data into training and test sets.")

        # Model 1: RandomForestRegressor
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        rf_model.fit(X_train, y_train)
        rf_predictions = rf_model.predict(X_test)

        # Model 2: XGBRegressor
        xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)
        xgb_model.fit(X_train, y_train)
        xgb_predictions = xgb_model.predict(X_test)

        logging.info("Models trained successfully.")

        # Evaluate models
        rf_mae = mean_absolute_error(y_test, rf_predictions)
        rf_mse = mean_squared_error(y_test, rf_predictions)

        xgb_mae = mean_absolute_error(y_test, xgb_predictions)
        xgb_mse = mean_squared_error(y_test, xgb_predictions)

        logging.info(f"Random Forest - MAE: {rf_mae}, MSE: {rf_mse}")
        logging.info(f"XGBoost - MAE: {xgb_mae}, MSE: {xgb_mse}")

        # Hyperparameter optimization for RandomForestRegressor using GridSearchCV
        param_grid = {
            'n_estimators': [100, 200],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5, 10]
        }
        grid_search_rf = GridSearchCV(RandomForestRegressor(), param_grid, cv=3)
        grid_search_rf.fit(X_train, y_train)
        best_rf_model = grid_search_rf.best_estimator_
        logging.info(f"Best RandomForest model: {grid_search_rf.best_params_}")

        # Hyperparameter optimization for XGBRegressor
        param_grid_xgb = {
            'n_estimators': [100, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
        grid_search_xgb = GridSearchCV(XGBRegressor(), param_grid_xgb, cv=3)
        grid_search_xgb.fit(X_train, y_train)
        best_xgb_model = grid_search_xgb.best_estimator_
        logging.info(f"Best XGBoost model: {grid_search_xgb.best_params_}")

        # Final evaluation after hyperparameter optimization
        best_rf_predictions = best_rf_model.predict(X_test)
        best_xgb_predictions = best_xgb_model.predict(X_test)

        best_rf_mae = mean_absolute_error(y_test, best_rf_predictions)
        best_rf_mse = mean_squared_error(y_test, best_rf_predictions)

        best_xgb_mae = mean_absolute_error(y_test, best_xgb_predictions)
        best_xgb_mse = mean_squared_error(y_test, best_xgb_predictions)

        logging.info(f"Optimized Random Forest - MAE: {best_rf_mae}, MSE: {best_rf_mse}")
        logging.info(f"Optimized XGBoost - MAE: {best_xgb_mae}, MSE: {best_xgb_mse}")

        return best_rf_model, best_xgb_model
    except Exception as e:
        logging.error(f"Error during model training and evaluation: {e}")
        raise

# Main function to run the entire process
def main():
    try:
        logging.info("Starting the model training process.")

        # Step 1: Generate the dataset
        data = generate_scratch_card_data(num_samples=10000, card_length=10)

        # Step 2: Train models and evaluate
        best_rf_model, best_xgb_model = train_models(data)

        logging.info("Model training and evaluation completed successfully.")

    except Exception as e:
        logging.error(f"Error in main function: {e}")

# Execute the main function
if __name__ == "__main__":
    main()
