In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load and clean data
data = pd.read_csv('/Users/azizraihan/Desktop/cse299/dataset/combined_seasons_data.csv')
columns_to_drop = ['AY', 'HY', 'AR', 'HR', 'AF', 'HF', 'Referee', 'HTR']
data = data.drop(columns=columns_to_drop)
data = data[data['FTR'] != 'D']
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%y')
data = data.sort_values(by='Date')

# Encode teams
team_encoder = LabelEncoder()
all_teams = pd.concat([data['HomeTeam'], data['AwayTeam']]).unique()
team_encoder.fit(all_teams)
data['HomeTeam'] = data['HomeTeam'].map(lambda x: team_encoder.transform([x])[0])
data['AwayTeam'] = data['AwayTeam'].map(lambda x: team_encoder.transform([x])[0])

# Encode FTR
data['FTR'] = data['FTR'].map({'H': 0, 'A': 1})

# Split data
train_data = data[data['Date'] < '2017-08-11']
test_data = data[data['Date'] >= '2017-08-11']

X_train = train_data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
X_test = test_data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
y_train = train_data['FTR']
y_test = test_data['FTR']

# Train and test a model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Evaluate
print("Training Accuracy:", accuracy_score(y_train, y_train_pred))
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("\nConfusion Matrix (Test Data):")
print(confusion_matrix(y_test, y_test_pred))
print("\nClassification Report (Test Data):")
print(classification_report(y_test, y_test_pred))


Training Accuracy: 1.0
Test Accuracy: 0.7817371937639198

Confusion Matrix (Test Data):
[[229  37]
 [ 61 122]]

Classification Report (Test Data):
              precision    recall  f1-score   support

           0       0.79      0.86      0.82       266
           1       0.77      0.67      0.71       183

    accuracy                           0.78       449
   macro avg       0.78      0.76      0.77       449
weighted avg       0.78      0.78      0.78       449



In [4]:
# Home metrics: Calculate average HS, HST, HC for each team at home
home_metrics = data.groupby('HomeTeam')[['HS', 'HST', 'HC']].mean()

# Away metrics: Calculate average AS, AST, AC for each team away
away_metrics = data.groupby('AwayTeam')[['AS', 'AST', 'AC']].mean()

# Display home and away metrics
print("Home Metrics (Indexed by Team IDs):")
print(home_metrics.head())  # Show the first few rows of the home metrics dataframe

print("\nAway Metrics (Indexed by Team IDs):")
print(away_metrics.head())  # Show the first few rows of the away metrics dataframe


Home Metrics (Indexed by Team IDs):
                 HS       HST        HC
HomeTeam                               
0         17.178082  8.178082  7.178082
1         12.010870  5.228261  5.869565
2         10.142857  5.333333  5.666667
3         13.186047  7.325581  5.209302
4         13.214286  7.357143  5.357143

Away Metrics (Indexed by Team IDs):
                 AS       AST        AC
AwayTeam                               
0         13.388489  6.302158  5.338129
1          9.515464  4.298969  4.288660
2          8.931034  5.310345  4.206897
3          8.418605  4.767442  4.093023
4          9.333333  4.866667  4.800000


In [12]:
from imblearn.ensemble import BalancedRandomForestClassifier

# Initialize the Balanced Random Forest model
brf_model = BalancedRandomForestClassifier(random_state=42)

# Train the model on the training data
brf_model.fit(X_train, y_train)

# Now the model is ready for evaluation


  warn(
  warn(
  warn(


In [14]:
# Evaluate the model on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, brf_model, team_encoder, home_metrics, away_metrics)

# Evaluate the model on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, brf_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:


ValueError: Data must be 1-dimensional, got ndarray of shape (36, 3) instead

In [16]:
from sklearn.metrics import confusion_matrix, classification_report

def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(lambda team_id: home_metrics.loc[team_id].values)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(lambda team_id: away_metrics.loc[team_id].values)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']

    # Target variable
    y = data['FTR']

    # Predicting using the trained model
    y_pred = model.predict(X)

    # Print the predicted results (first few entries)
    print("True labels (y) - first few entries:")
    print(y.head())
    print("Predicted labels (y_pred) - first few entries:")
    print(pd.Series(y_pred).head())

    # Calculate the accuracy of the model on this dataset
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")

    # Confusion Matrix and Classification Report for more details
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Evaluate the model on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, brf_model, team_encoder, home_metrics, away_metrics)

# Evaluate the model on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, brf_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:


ValueError: Columns must be same length as key

In [18]:
from sklearn.metrics import confusion_matrix, classification_report

# Define the evaluate_predict_function
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()

    # Map home metrics
    home_values = data['HomeTeam'].map(lambda team_id: home_metrics.loc[team_id].values)
    X[['HS', 'HST', 'HC']] = pd.DataFrame(home_values.tolist(), index=data.index)

    # Map away metrics
    away_values = data['AwayTeam'].map(lambda team_id: away_metrics.loc[team_id].values)
    X[['AS', 'AST', 'AC']] = pd.DataFrame(away_values.tolist(), index=data.index)

    # Add the original encoded team IDs
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']

    # Target variable
    y = data['FTR']

    # Predicting using the trained model
    y_pred = model.predict(X)

    # Print the predicted results (first few entries)
    print("True labels (y) - first few entries:")
    print(y.head())
    print("Predicted labels (y_pred) - first few entries:")
    print(pd.Series(y_pred).head())

    # Calculate the accuracy of the model on this dataset
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")

    # Confusion Matrix and Classification Report for more details
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Evaluate the model on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, brf_model, team_encoder, home_metrics, away_metrics)

# Evaluate the model on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, brf_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:


ValueError: The feature names should match those that were passed during fit.
Feature names must be in the same order as they were in fit.


In [20]:
# Check the order of feature names used during model fitting
print("Feature names used during model fitting:")
print(brf_model.feature_names_in_)


Feature names used during model fitting:
['HomeTeam' 'AwayTeam' 'HS' 'AS' 'HST' 'AST' 'HC' 'AC']


In [22]:
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].map(home_metrics).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].map(away_metrics).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    
    # Align feature columns to the training order
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    
    # Get the target variable
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print the predicted results (first few entries)
    print("True labels (y) - first few entries:")
    print(y.head())
    print("Predicted labels (y_pred) - first few entries:")
    print(pd.Series(y_pred).head())
    
    # Calculate the accuracy of the model on this dataset
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    
    # Confusion Matrix and Classification Report for more details
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))


In [24]:
from sklearn.metrics import confusion_matrix, classification_report

# Define the evaluate_predict_function
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].map(home_metrics).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].map(away_metrics).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    
    # Align feature columns to the training order
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    
    # Get the target variable
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print the predicted results (first few entries)
    print("True labels (y) - first few entries:")
    print(y.head())
    print("Predicted labels (y_pred) - first few entries:")
    print(pd.Series(y_pred).head())
    
    # Calculate the accuracy of the model on this dataset
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    
    # Confusion Matrix and Classification Report for more details
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Evaluate the model on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, brf_model, team_encoder, home_metrics, away_metrics)

# Evaluate the model on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, brf_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:


ValueError: Data must be 1-dimensional, got ndarray of shape (36, 3) instead

In [26]:
from sklearn.metrics import confusion_matrix, classification_report

# Define the evaluate_predict_function
def evaluate_predict_function(data, model):
    # Get the features (X) and target (y) from the data
    # Exclude FTHG, FTAG, HTHG, and HTAG
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y = data['FTR']

    # Predicting using the trained model
    y_pred = model.predict(X)

    # Print the predicted results (first few entries)
    print("True labels (y) - first few entries:")
    print(y.head())
    print("Predicted labels (y_pred) - first few entries:")
    print(pd.Series(y_pred).head())

    # Calculate the accuracy of the model on this dataset
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")

    # Confusion Matrix and Classification Report for more details
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Evaluate the model on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, brf_model)

# Evaluate the model on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, brf_model)



Evaluating on Training Data:
True labels (y) - first few entries:
0    1
1    1
2    1
3    0
4    1
Name: FTR, dtype: int64
Predicted labels (y_pred) - first few entries:
0    1
1    1
2    1
3    0
4    1
dtype: int64
Accuracy on this dataset: 0.9837

Confusion Matrix:
[[1367   37]
 [   0  866]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.99      1404
           1       0.96      1.00      0.98       866

    accuracy                           0.98      2270
   macro avg       0.98      0.99      0.98      2270
weighted avg       0.98      0.98      0.98      2270


Evaluating on Test Data:
True labels (y) - first few entries:
3040    0
3047    0
3044    0
3043    1
3041    1
Name: FTR, dtype: int64
Predicted labels (y_pred) - first few entries:
0    0
1    0
2    0
3    1
4    1
dtype: int64
Accuracy on this dataset: 0.7817

Confusion Matrix:
[[201  65]
 [ 33 150]]

Classification Report:
              prec

In [30]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Function to evaluate a model on training and test data
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print accuracy and evaluation metrics
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Train and test Logistic Regression
print("\nTraining and Testing Logistic Regression with Regularization...")
logistic_model = LogisticRegression(C=0.1, random_state=42)
logistic_model.fit(X_train, y_train)
print("\nEvaluating Logistic Regression on Training Data:")
evaluate_predict_function(train_data, logistic_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Logistic Regression on Test Data:")
evaluate_predict_function(test_data, logistic_model, team_encoder, home_metrics, away_metrics)

# Train and test Support Vector Machine
print("\nTraining and Testing SVM with Regularization...")
svm_model = SVC(C=0.1, kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
print("\nEvaluating SVM on Training Data:")
evaluate_predict_function(train_data, svm_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating SVM on Test Data:")
evaluate_predict_function(test_data, svm_model, team_encoder, home_metrics, away_metrics)

# Train and test Gradient Boosting
print("\nTraining and Testing Gradient Boosting with Regularization...")
gb_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)
print("\nEvaluating Gradient Boosting on Training Data:")
evaluate_predict_function(train_data, gb_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Gradient Boosting on Test Data:")
evaluate_predict_function(test_data, gb_model, team_encoder, home_metrics, away_metrics)



Training and Testing Logistic Regression with Regularization...

Evaluating Logistic Regression on Training Data:


ValueError: The feature names should match those that were passed during fit.
Feature names must be in the same order as they were in fit.


In [32]:
# Retrieve and print feature names used during model fitting
if hasattr(model, 'feature_names_in_'):
    print("Feature names used during model fitting:")
    print(model.feature_names_in_)
else:
    print("The model does not have the attribute 'feature_names_in_' to display feature names.")


Feature names used during model fitting:
['HomeTeam' 'AwayTeam' 'HS' 'AS' 'HST' 'AST' 'HC' 'AC']


In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Function to evaluate a model on training and test data
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X) in the correct order
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order
    
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print accuracy and evaluation metrics
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Train and test Logistic Regression
print("\nTraining and Testing Logistic Regression with Regularization...")
logistic_model = LogisticRegression(C=0.1, random_state=42)
logistic_model.fit(X_train, y_train)
print("\nEvaluating Logistic Regression on Training Data:")
evaluate_predict_function(train_data, logistic_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Logistic Regression on Test Data:")
evaluate_predict_function(test_data, logistic_model, team_encoder, home_metrics, away_metrics)

# Train and test Support Vector Machine
print("\nTraining and Testing SVM with Regularization...")
svm_model = SVC(C=0.1, kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
print("\nEvaluating SVM on Training Data:")
evaluate_predict_function(train_data, svm_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating SVM on Test Data:")
evaluate_predict_function(test_data, svm_model, team_encoder, home_metrics, away_metrics)

# Train and test Gradient Boosting
print("\nTraining and Testing Gradient Boosting with Regularization...")
gb_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)
print("\nEvaluating Gradient Boosting on Training Data:")
evaluate_predict_function(train_data, gb_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Gradient Boosting on Test Data:")
evaluate_predict_function(test_data, gb_model, team_encoder, home_metrics, away_metrics)



Training and Testing Logistic Regression with Regularization...

Evaluating Logistic Regression on Training Data:
Accuracy on this dataset: 0.6674

Confusion Matrix:
[[1295  109]
 [ 646  220]]

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.92      0.77      1404
           1       0.67      0.25      0.37       866

    accuracy                           0.67      2270
   macro avg       0.67      0.59      0.57      2270
weighted avg       0.67      0.67      0.62      2270


Evaluating Logistic Regression on Test Data:
Accuracy on this dataset: 0.7171

Confusion Matrix:
[[244  22]
 [105  78]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.92      0.79       266
           1       0.78      0.43      0.55       183

    accuracy                           0.72       449
   macro avg       0.74      0.67      0.67       449
weighted avg       0.73      0.72     

In [36]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Train and test Decision Tree
print("\nTraining and Testing Decision Tree...")
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
print("\nEvaluating Decision Tree on Training Data:")
evaluate_predict_function(train_data, dt_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Decision Tree on Test Data:")
evaluate_predict_function(test_data, dt_model, team_encoder, home_metrics, away_metrics)

# Train and test Random Forest
print("\nTraining and Testing Random Forest...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
print("\nEvaluating Random Forest on Training Data:")
evaluate_predict_function(train_data, rf_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Random Forest on Test Data:")
evaluate_predict_function(test_data, rf_model, team_encoder, home_metrics, away_metrics)

# Train and test AdaBoost
print("\nTraining and Testing AdaBoost...")
ab_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ab_model.fit(X_train, y_train)
print("\nEvaluating AdaBoost on Training Data:")
evaluate_predict_function(train_data, ab_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating AdaBoost on Test Data:")
evaluate_predict_function(test_data, ab_model, team_encoder, home_metrics, away_metrics)

# Train and test Extra Trees
print("\nTraining and Testing Extra Trees...")
et_model = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_model.fit(X_train, y_train)
print("\nEvaluating Extra Trees on Training Data:")
evaluate_predict_function(train_data, et_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Extra Trees on Test Data:")
evaluate_predict_function(test_data, et_model, team_encoder, home_metrics, away_metrics)



Training and Testing Decision Tree...

Evaluating Decision Tree on Training Data:
Accuracy on this dataset: 0.5308

Confusion Matrix:
[[903 501]
 [564 302]]

Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.64      0.63      1404
           1       0.38      0.35      0.36       866

    accuracy                           0.53      2270
   macro avg       0.50      0.50      0.50      2270
weighted avg       0.52      0.53      0.53      2270


Evaluating Decision Tree on Test Data:
Accuracy on this dataset: 0.6347

Confusion Matrix:
[[184  82]
 [ 82 101]]

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.69      0.69       266
           1       0.55      0.55      0.55       183

    accuracy                           0.63       449
   macro avg       0.62      0.62      0.62       449
weighted avg       0.63      0.63      0.63       449


Training and Testing Ran




Evaluating AdaBoost on Training Data:
Accuracy on this dataset: 0.6784

Confusion Matrix:
[[1151  253]
 [ 477  389]]

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.82      0.76      1404
           1       0.61      0.45      0.52       866

    accuracy                           0.68      2270
   macro avg       0.66      0.63      0.64      2270
weighted avg       0.67      0.68      0.67      2270


Evaluating AdaBoost on Test Data:
Accuracy on this dataset: 0.7038

Confusion Matrix:
[[219  47]
 [ 86  97]]

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.82      0.77       266
           1       0.67      0.53      0.59       183

    accuracy                           0.70       449
   macro avg       0.70      0.68      0.68       449
weighted avg       0.70      0.70      0.70       449


Training and Testing Extra Trees...

Evaluating Extra Trees on Traini

In [38]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron

# Train and test Bagging Classifier
print("\nTraining and Testing Bagging Classifier...")
bagging_model = BaggingClassifier(n_estimators=100, random_state=42)
bagging_model.fit(X_train, y_train)
print("\nEvaluating Bagging Classifier on Training Data:")
evaluate_predict_function(train_data, bagging_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Bagging Classifier on Test Data:")
evaluate_predict_function(test_data, bagging_model, team_encoder, home_metrics, away_metrics)

# Train and test K-Nearest Neighbors
print("\nTraining and Testing K-Nearest Neighbors...")
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
print("\nEvaluating K-Nearest Neighbors on Training Data:")
evaluate_predict_function(train_data, knn_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating K-Nearest Neighbors on Test Data:")
evaluate_predict_function(test_data, knn_model, team_encoder, home_metrics, away_metrics)

# Train and test Gaussian Naive Bayes
print("\nTraining and Testing Gaussian Naive Bayes...")
gnb_model = GaussianNB()
gnb_model.fit(X_train, y_train)
print("\nEvaluating Gaussian Naive Bayes on Training Data:")
evaluate_predict_function(train_data, gnb_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Gaussian Naive Bayes on Test Data:")
evaluate_predict_function(test_data, gnb_model, team_encoder, home_metrics, away_metrics)

# Train and test Perceptron
print("\nTraining and Testing Perceptron...")
perceptron_model = Perceptron(random_state=42, max_iter=1000, tol=1e-3)
perceptron_model.fit(X_train, y_train)
print("\nEvaluating Perceptron on Training Data:")
evaluate_predict_function(train_data, perceptron_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Perceptron on Test Data:")
evaluate_predict_function(test_data, perceptron_model, team_encoder, home_metrics, away_metrics)



Training and Testing Bagging Classifier...

Evaluating Bagging Classifier on Training Data:
Accuracy on this dataset: 0.6780

Confusion Matrix:
[[1207  197]
 [ 534  332]]

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.86      0.77      1404
           1       0.63      0.38      0.48       866

    accuracy                           0.68      2270
   macro avg       0.66      0.62      0.62      2270
weighted avg       0.67      0.68      0.66      2270


Evaluating Bagging Classifier on Test Data:
Accuracy on this dataset: 0.6904

Confusion Matrix:
[[219  47]
 [ 92  91]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.82      0.76       266
           1       0.66      0.50      0.57       183

    accuracy                           0.69       449
   macro avg       0.68      0.66      0.66       449
weighted avg       0.69      0.69      0.68       449


Train

In [40]:
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Train and evaluate BalancedRandomForestClassifier
print("\nTraining and Testing Balanced Random Forest...")
brf_model = BalancedRandomForestClassifier(random_state=42)
brf_model.fit(X_train, y_train)

print("\nEvaluating Balanced Random Forest on Training Data:")
evaluate_predict_function(train_data, brf_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Balanced Random Forest on Test Data:")
evaluate_predict_function(test_data, brf_model, team_encoder, home_metrics, away_metrics)



Training and Testing Balanced Random Forest...


  warn(
  warn(
  warn(



Evaluating Balanced Random Forest on Training Data:
Accuracy on this dataset: 0.6542

Confusion Matrix:
[[932 472]
 [313 553]]

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.66      0.70      1404
           1       0.54      0.64      0.58       866

    accuracy                           0.65      2270
   macro avg       0.64      0.65      0.64      2270
weighted avg       0.67      0.65      0.66      2270


Evaluating Balanced Random Forest on Test Data:
Accuracy on this dataset: 0.6704

Confusion Matrix:
[[176  90]
 [ 58 125]]

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.66      0.70       266
           1       0.58      0.68      0.63       183

    accuracy                           0.67       449
   macro avg       0.67      0.67      0.67       449
weighted avg       0.68      0.67      0.67       449



In [42]:
from sklearn.linear_model import LogisticRegression

# Train and evaluate Logistic Regression with class_weight='balanced'
print("\nTraining and Testing Logistic Regression with Class Weight...")
logistic_model_balanced = LogisticRegression(C=0.1, class_weight='balanced', random_state=42)
logistic_model_balanced.fit(X_train, y_train)

print("\nEvaluating Logistic Regression with Class Weight on Training Data:")
evaluate_predict_function(train_data, logistic_model_balanced, team_encoder, home_metrics, away_metrics)
print("\nEvaluating Logistic Regression with Class Weight on Test Data:")
evaluate_predict_function(test_data, logistic_model_balanced, team_encoder, home_metrics, away_metrics)



Training and Testing Logistic Regression with Class Weight...

Evaluating Logistic Regression with Class Weight on Training Data:
Accuracy on this dataset: 0.6463

Confusion Matrix:
[[999 405]
 [398 468]]

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.71      0.71      1404
           1       0.54      0.54      0.54       866

    accuracy                           0.65      2270
   macro avg       0.63      0.63      0.63      2270
weighted avg       0.65      0.65      0.65      2270


Evaluating Logistic Regression with Class Weight on Test Data:
Accuracy on this dataset: 0.7105

Confusion Matrix:
[[198  68]
 [ 62 121]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.74      0.75       266
           1       0.64      0.66      0.65       183

    accuracy                           0.71       449
   macro avg       0.70      0.70      0.70       449
weighted

In [44]:
from sklearn.svm import SVC

# Train and evaluate SVM with class_weight='balanced'
print("\nTraining and Testing SVM with Class Weight...")
svm_model_balanced = SVC(C=0.1, kernel='linear', class_weight='balanced', random_state=42)
svm_model_balanced.fit(X_train, y_train)

print("\nEvaluating SVM with Class Weight on Training Data:")
evaluate_predict_function(train_data, svm_model_balanced, team_encoder, home_metrics, away_metrics)
print("\nEvaluating SVM with Class Weight on Test Data:")
evaluate_predict_function(test_data, svm_model_balanced, team_encoder, home_metrics, away_metrics)



Training and Testing SVM with Class Weight...

Evaluating SVM with Class Weight on Training Data:
Accuracy on this dataset: 0.6405

Confusion Matrix:
[[994 410]
 [406 460]]

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.71      0.71      1404
           1       0.53      0.53      0.53       866

    accuracy                           0.64      2270
   macro avg       0.62      0.62      0.62      2270
weighted avg       0.64      0.64      0.64      2270


Evaluating SVM with Class Weight on Test Data:
Accuracy on this dataset: 0.7060

Confusion Matrix:
[[195  71]
 [ 61 122]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.73      0.75       266
           1       0.63      0.67      0.65       183

    accuracy                           0.71       449
   macro avg       0.70      0.70      0.70       449
weighted avg       0.71      0.71      0.71       449



In [46]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Function to evaluate a model on training and test data
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X) in the correct order
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order
    
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print accuracy and evaluation metrics
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Train and test XGBoost
print("\nTraining and Testing XGBoost with Class Weight...")
xgb_model = XGBClassifier(scale_pos_weight=len(train_data[train_data['FTR'] == 0]) / len(train_data[train_data['FTR'] == 1]), random_state=42)
xgb_model.fit(X_train, y_train)
print("\nEvaluating XGBoost on Training Data:")
evaluate_predict_function(train_data, xgb_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating XGBoost on Test Data:")
evaluate_predict_function(test_data, xgb_model, team_encoder, home_metrics, away_metrics)

# Train and test LightGBM
print("\nTraining and Testing LightGBM with Class Weight...")
lgbm_model = LGBMClassifier(class_weight='balanced', random_state=42)
lgbm_model.fit(X_train, y_train)
print("\nEvaluating LightGBM on Training Data:")
evaluate_predict_function(train_data, lgbm_model, team_encoder, home_metrics, away_metrics)
print("\nEvaluating LightGBM on Test Data:")
evaluate_predict_function(test_data, lgbm_model, team_encoder, home_metrics, away_metrics)



Training and Testing XGBoost with Class Weight...

Evaluating XGBoost on Training Data:
Accuracy on this dataset: 0.6978

Confusion Matrix:
[[1060  344]
 [ 342  524]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.75      0.76      1404
           1       0.60      0.61      0.60       866

    accuracy                           0.70      2270
   macro avg       0.68      0.68      0.68      2270
weighted avg       0.70      0.70      0.70      2270


Evaluating XGBoost on Test Data:
Accuracy on this dataset: 0.7127

Confusion Matrix:
[[198  68]
 [ 61 122]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.74      0.75       266
           1       0.64      0.67      0.65       183

    accuracy                           0.71       449
   macro avg       0.70      0.71      0.70       449
weighted avg       0.71      0.71      0.71       449


Training and Testing

In [48]:
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Function to evaluate models on training and test data
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X) in the correct order
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order
    y = data['FTR']
    
    # Predicting using the trained model
    y_pred = model.predict(X)
    
    # Print accuracy and evaluation metrics
    accuracy = (y_pred == y).mean()
    print(f"Accuracy on this dataset: {accuracy:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Apply SMOTE on the training data
print("\nApplying SMOTE to balance the training dataset...")
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

print(f"Original class distribution: {dict(pd.Series(y_train).value_counts())}")
print(f"SMOTE class distribution: {dict(pd.Series(y_train_smote).value_counts())}")

# Models to train on SMOTE-balanced data
models = {
    "Logistic Regression": LogisticRegression(C=0.1, random_state=42),
    "SVM": SVC(C=0.1, kernel='linear', random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1]), use_label_encoder=False, eval_metric='logloss', random_state=42),
    "LightGBM": LGBMClassifier(class_weight='balanced', random_state=42)
}

# Train and evaluate each model
for name, model in models.items():
    print(f"\nTraining and Testing {name}...")
    model.fit(X_train_smote, y_train_smote)
    
    print(f"\nEvaluating {name} on Training Data:")
    evaluate_predict_function(train_data, model, team_encoder, home_metrics, away_metrics)
    
    print(f"\nEvaluating {name} on Test Data:")
    evaluate_predict_function(test_data, model, team_encoder, home_metrics, away_metrics)



Applying SMOTE to balance the training dataset...
Original class distribution: {0: 1404, 1: 866}
SMOTE class distribution: {1: 1404, 0: 1404}

Training and Testing Logistic Regression...

Evaluating Logistic Regression on Training Data:
Accuracy on this dataset: 0.6454

Confusion Matrix:
[[1003  401]
 [ 404  462]]

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.71      0.71      1404
           1       0.54      0.53      0.53       866

    accuracy                           0.65      2270
   macro avg       0.62      0.62      0.62      2270
weighted avg       0.65      0.65      0.65      2270


Evaluating Logistic Regression on Test Data:
Accuracy on this dataset: 0.6949

Confusion Matrix:
[[193  73]
 [ 64 119]]

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.73      0.74       266
           1       0.62      0.65      0.63       183

    accuracy           

Parameters: { "use_label_encoder" } are not used.




Evaluating XGBoost on Training Data:
Accuracy on this dataset: 0.6744

Confusion Matrix:
[[932 472]
 [267 599]]

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.66      0.72      1404
           1       0.56      0.69      0.62       866

    accuracy                           0.67      2270
   macro avg       0.67      0.68      0.67      2270
weighted avg       0.69      0.67      0.68      2270


Evaluating XGBoost on Test Data:
Accuracy on this dataset: 0.6860

Confusion Matrix:
[[172  94]
 [ 47 136]]

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.65      0.71       266
           1       0.59      0.74      0.66       183

    accuracy                           0.69       449
   macro avg       0.69      0.69      0.68       449
weighted avg       0.71      0.69      0.69       449


Training and Testing LightGBM...
[LightGBM] [Info] Number of positive: 1404

In [50]:
#^smote

In [52]:
#more smote:

In [54]:
from imblearn.over_sampling import ADASYN, BorderlineSMOTE
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Function to train and evaluate models
def train_and_evaluate_with_resampling(resampler, resampler_name):
    print(f"\nResampling using {resampler_name}...\n")
    
    # Resample training data
    X_resampled, y_resampled = resampler.fit_resample(X_train, y_train)
    
    # Train and test XGBoost
    print("\nTraining and Testing XGBoost...")
    xgb_model = XGBClassifier(scale_pos_weight=1, random_state=42, use_label_encoder=False, eval_metric='logloss')
    xgb_model.fit(X_resampled, y_resampled)
    print("\nEvaluating XGBoost on Training Data:")
    evaluate_predict_function(train_data, xgb_model, team_encoder, home_metrics, away_metrics)
    print("\nEvaluating XGBoost on Test Data:")
    evaluate_predict_function(test_data, xgb_model, team_encoder, home_metrics, away_metrics)

    # Train and test LightGBM
    print("\nTraining and Testing LightGBM...")
    lgb_model = LGBMClassifier(class_weight='balanced', random_state=42)
    lgb_model.fit(X_resampled, y_resampled)
    print("\nEvaluating LightGBM on Training Data:")
    evaluate_predict_function(train_data, lgb_model, team_encoder, home_metrics, away_metrics)
    print("\nEvaluating LightGBM on Test Data:")
    evaluate_predict_function(test_data, lgb_model, team_encoder, home_metrics, away_metrics)

# Experiment with ADASYN
adasyn_resampler = ADASYN(random_state=42)
train_and_evaluate_with_resampling(adasyn_resampler, "ADASYN")

# Experiment with Borderline-SMOTE
borderline_smote_resampler = BorderlineSMOTE(random_state=42, kind='borderline-1')
train_and_evaluate_with_resampling(borderline_smote_resampler, "Borderline-SMOTE")



Resampling using ADASYN...


Training and Testing XGBoost...


Parameters: { "use_label_encoder" } are not used.




Evaluating XGBoost on Training Data:
Accuracy on this dataset: 0.6718

Confusion Matrix:
[[954 450]
 [295 571]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.68      0.72      1404
           1       0.56      0.66      0.61       866

    accuracy                           0.67      2270
   macro avg       0.66      0.67      0.66      2270
weighted avg       0.69      0.67      0.68      2270


Evaluating XGBoost on Test Data:
Accuracy on this dataset: 0.6971

Confusion Matrix:
[[180  86]
 [ 50 133]]

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.68      0.73       266
           1       0.61      0.73      0.66       183

    accuracy                           0.70       449
   macro avg       0.69      0.70      0.69       449
weighted avg       0.71      0.70      0.70       449


Training and Testing LightGBM...
[LightGBM] [Info] Number of positive: 1307

Parameters: { "use_label_encoder" } are not used.




Evaluating XGBoost on Training Data:
Accuracy on this dataset: 0.6665

Confusion Matrix:
[[928 476]
 [281 585]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.66      0.71      1404
           1       0.55      0.68      0.61       866

    accuracy                           0.67      2270
   macro avg       0.66      0.67      0.66      2270
weighted avg       0.69      0.67      0.67      2270


Evaluating XGBoost on Test Data:
Accuracy on this dataset: 0.6837

Confusion Matrix:
[[175  91]
 [ 51 132]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.66      0.71       266
           1       0.59      0.72      0.65       183

    accuracy                           0.68       449
   macro avg       0.68      0.69      0.68       449
weighted avg       0.70      0.68      0.69       449


Training and Testing LightGBM...
[LightGBM] [Info] Number of positive: 1404

In [56]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Example evaluate_predict_function to evaluate and predict the "winner" for a dataset
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    """
    Evaluates the model on the given data and prints the performance metrics.
    The `team_encoder`, `home_metrics`, and `away_metrics` should be used to transform
    the data before making predictions.
    """
    # Assuming data includes features that need to be transformed or encoded before prediction
    X = prepare_features(data, team_encoder, home_metrics, away_metrics)  # Implement this based on your data
    y_true = data['target']  # Assuming 'target' is the column with true labels
    
    # Predict with the model
    y_pred = model.predict(X)
    
    # Print accuracy and classification report
    print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# Initialize the base model (RandomForest in this case)
model = RandomForestClassifier()

# Define the SMOTE sampler with initial parameters
smote = SMOTE(random_state=42)

# Apply SMOTE to balance the classes in the training data
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Set up the parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200],       # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],   # Depth of the trees
    'min_samples_split': [2, 5, 10],   # Minimum number of samples to split a node
    'min_samples_leaf': [1, 2, 4],     # Minimum number of samples at a leaf node
    'bootstrap': [True, False],        # Whether bootstrap samples are used when building trees
}

# Grid search to tune SMOTE and the classifier
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Train the model on the resampled dataset
grid_search.fit(X_resampled, y_resampled)

# Best parameters from grid search
print("Best Parameters:", grid_search.best_params_)

# Evaluating the model on training and test data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, grid_search.best_estimator_, team_encoder, home_metrics, away_metrics)

print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, grid_search.best_estimator_, team_encoder, home_metrics, away_metrics)

# --- Predicting the "Winner" ---
# Assuming that your "winner" class is labeled as 1 (can be adjusted based on your dataset)
X_new = np.array([[...], [...], ...])  # Your new sample data here (rows of features)
y_new_pred = grid_search.best_estimator_.predict(X_new)

# Output the prediction for new samples
for i, prediction in enumerate(y_new_pred):
    result = "Winner" if prediction == 1 else "Loser"  # Adjust this based on your class labeling
    print(f"Sample {i+1} prediction: {result}")


Best Parameters: {'bootstrap': False, 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Evaluating on Training Data:


NameError: name 'prepare_features' is not defined

In [58]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import pandas as pd

# Function to prepare features for prediction
def prepare_features(data, team_encoder, home_metrics, away_metrics):
    """
    Prepares the feature matrix (X) for the model based on home and away metrics.
    """
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    return X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct column order

# Example evaluate_predict_function to evaluate and predict the "winner" for a dataset
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    """
    Evaluates the model on the given data and prints the performance metrics.
    """
    # Prepare features (X) and extract true labels (y)
    X = prepare_features(data, team_encoder, home_metrics, away_metrics)
    y_true = data['FTR']  # Assuming 'FTR' is the target column
    
    # Predict with the model
    y_pred = model.predict(X)
    
    # Print accuracy and classification report
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# Apply SMOTE to balance the classes in the training data
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Initialize RandomForest with the best parameters
best_rf_model = RandomForestClassifier(
    bootstrap=False,
    max_depth=20,
    min_samples_leaf=1,
    min_samples_split=2,
    n_estimators=100,
    random_state=42
)

# Train the model on the resampled dataset
best_rf_model.fit(X_resampled, y_resampled)

# Evaluating the model on training and test data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, best_rf_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, best_rf_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:
Accuracy: 0.6542
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.73      0.72      1404
           1       0.55      0.52      0.54       866

    accuracy                           0.65      2270
   macro avg       0.63      0.63      0.63      2270
weighted avg       0.65      0.65      0.65      2270


Evaluating on Test Data:
Accuracy: 0.6904
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.74      0.74       266
           1       0.62      0.62      0.62       183

    accuracy                           0.69       449
   macro avg       0.68      0.68      0.68       449
weighted avg       0.69      0.69      0.69       449



In [60]:
#smote

In [62]:
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Function to evaluate model performance
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Correct feature order
    
    y_true = data['FTR']
    
    # Predict and evaluate
    y_pred = model.predict(X)
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# SMOTE augmentation for training data
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Train a model with augmented data
smote_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    bootstrap=False,
    random_state=42
)
smote_model.fit(X_resampled, y_resampled)

# Evaluate on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, smote_model, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, smote_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:
Accuracy: 0.6542
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.73      0.72      1404
           1       0.55      0.52      0.54       866

    accuracy                           0.65      2270
   macro avg       0.63      0.63      0.63      2270
weighted avg       0.65      0.65      0.65      2270


Evaluating on Test Data:
Accuracy: 0.6904
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.74      0.74       266
           1       0.62      0.62      0.62       183

    accuracy                           0.69       449
   macro avg       0.68      0.68      0.68       449
weighted avg       0.69      0.69      0.69       449



In [64]:
#adysn

In [66]:
from imblearn.over_sampling import ADASYN
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Function to evaluate model performance
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Correct feature order
    
    y_true = data['FTR']
    
    # Predict and evaluate
    y_pred = model.predict(X)
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# ADASYN augmentation for training data
adasyn = ADASYN(random_state=42)
X_resampled, y_resampled = adasyn.fit_resample(X_train, y_train)

# Train a model with augmented data
adasyn_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    bootstrap=False,
    random_state=42
)
adasyn_model.fit(X_resampled, y_resampled)

# Evaluate on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, adasyn_model, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, adasyn_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:
Accuracy: 0.6392
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.67      0.70      1404
           1       0.52      0.58      0.55       866

    accuracy                           0.64      2270
   macro avg       0.62      0.63      0.63      2270
weighted avg       0.65      0.64      0.64      2270


Evaluating on Test Data:
Accuracy: 0.6837
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.71      0.73       266
           1       0.60      0.65      0.63       183

    accuracy                           0.68       449
   macro avg       0.68      0.68      0.68       449
weighted avg       0.69      0.68      0.69       449



In [68]:
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Function to evaluate model performance
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Correct feature order
    
    y_true = data['FTR']
    
    # Predict and evaluate
    y_pred = model.predict(X)
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# Borderline-SMOTE augmentation for training data
borderline_smote = BorderlineSMOTE(random_state=42)
X_resampled, y_resampled = borderline_smote.fit_resample(X_train, y_train)

# Train a model with augmented data
borderline_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    bootstrap=False,
    random_state=42
)
borderline_model.fit(X_resampled, y_resampled)

# Evaluate on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, borderline_model, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, borderline_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:
Accuracy: 0.6480
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.69      0.71      1404
           1       0.54      0.59      0.56       866

    accuracy                           0.65      2270
   macro avg       0.63      0.64      0.63      2270
weighted avg       0.66      0.65      0.65      2270


Evaluating on Test Data:
Accuracy: 0.6793
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.69      0.72       266
           1       0.60      0.66      0.63       183

    accuracy                           0.68       449
   macro avg       0.67      0.68      0.67       449
weighted avg       0.69      0.68      0.68       449



In [70]:
#borderline smote

In [72]:
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Function to evaluate model performance
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Correct feature order
    
    y_true = data['FTR']
    
    # Predict and evaluate
    y_pred = model.predict(X)
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# Borderline-SMOTE augmentation for training data
borderline_smote = BorderlineSMOTE(
    random_state=42, 
    k_neighbors=7,  # Increased neighbors to improve synthetic data quality
    sampling_strategy=0.7  # Balance minority class to 70% of the majority class
)
X_resampled, y_resampled = borderline_smote.fit_resample(X_train, y_train)

# Train a model with augmented data
borderline_model = RandomForestClassifier(
    n_estimators=200,  # Increase number of trees for better performance
    max_depth=25,  # Allow slightly deeper trees
    min_samples_split=5,  # Increase split criterion for more robust splits
    min_samples_leaf=2,  # Prevent overfitting with slightly larger leaf nodes
    bootstrap=True,  # Use bootstrapping to improve generalization
    random_state=42
)
borderline_model.fit(X_resampled, y_resampled)

# Evaluate on training data
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, borderline_model, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, borderline_model, team_encoder, home_metrics, away_metrics)



Evaluating on Training Data:
Accuracy: 0.6784
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.84      0.76      1404
           1       0.61      0.42      0.50       866

    accuracy                           0.68      2270
   macro avg       0.66      0.63      0.63      2270
weighted avg       0.67      0.68      0.66      2270


Evaluating on Test Data:
Accuracy: 0.7016
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.82      0.76       266
           1       0.67      0.54      0.59       183

    accuracy                           0.70       449
   macro avg       0.69      0.68      0.68       449
weighted avg       0.70      0.70      0.69       449



In [74]:
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Fine-tune Borderline-SMOTE parameters
borderline_smote = BorderlineSMOTE(random_state=42, k_neighbors=10, sampling_strategy=0.9)

# Apply Borderline-SMOTE
X_resampled, y_resampled = borderline_smote.fit_resample(X_train, y_train)

# Train a RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_resampled, y_resampled)

# Evaluate on train and test data
print("\nEvaluating RandomForest with Fine-Tuned Borderline-SMOTE on Training Data:")
evaluate_predict_function(train_data, rf_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating RandomForest with Fine-Tuned Borderline-SMOTE on Test Data:")
evaluate_predict_function(test_data, rf_model, team_encoder, home_metrics, away_metrics)



Evaluating RandomForest with Fine-Tuned Borderline-SMOTE on Training Data:
Accuracy: 0.6749
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.70      0.73      1404
           1       0.57      0.63      0.60       866

    accuracy                           0.67      2270
   macro avg       0.66      0.67      0.66      2270
weighted avg       0.68      0.67      0.68      2270


Evaluating RandomForest with Fine-Tuned Borderline-SMOTE on Test Data:
Accuracy: 0.6860
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.67      0.72       266
           1       0.60      0.71      0.65       183

    accuracy                           0.69       449
   macro avg       0.68      0.69      0.68       449
weighted avg       0.70      0.69      0.69       449



In [76]:
from xgboost import XGBClassifier

# Train XGBoost on resampled data
xgb_model = XGBClassifier(
    learning_rate=0.05,
    max_depth=5,
    n_estimators=200,
    scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1]),
    random_state=42
)
xgb_model.fit(X_resampled, y_resampled)

# Evaluate on train and test data
print("\nEvaluating XGBoost with Fine-Tuned Borderline-SMOTE on Training Data:")
evaluate_predict_function(train_data, xgb_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating XGBoost with Fine-Tuned Borderline-SMOTE on Test Data:")
evaluate_predict_function(test_data, xgb_model, team_encoder, home_metrics, away_metrics)



Evaluating XGBoost with Fine-Tuned Borderline-SMOTE on Training Data:
Accuracy: 0.6079
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.45      0.59      1404
           1       0.49      0.86      0.63       866

    accuracy                           0.61      2270
   macro avg       0.66      0.66      0.61      2270
weighted avg       0.71      0.61      0.60      2270


Evaluating XGBoost with Fine-Tuned Borderline-SMOTE on Test Data:
Accuracy: 0.6704
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.53      0.66       266
           1       0.56      0.87      0.68       183

    accuracy                           0.67       449
   macro avg       0.71      0.70      0.67       449
weighted avg       0.74      0.67      0.67       449



In [78]:
from lightgbm import LGBMClassifier

# Train LightGBM on resampled data
lgb_model = LGBMClassifier(
    learning_rate=0.05,
    max_depth=7,
    n_estimators=200,
    class_weight='balanced',
    random_state=42
)
lgb_model.fit(X_resampled, y_resampled)

# Evaluate on train and test data
print("\nEvaluating LightGBM with Fine-Tuned Borderline-SMOTE on Training Data:")
evaluate_predict_function(train_data, lgb_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating LightGBM with Fine-Tuned Borderline-SMOTE on Test Data:")
evaluate_predict_function(test_data, lgb_model, team_encoder, home_metrics, away_metrics)


[LightGBM] [Info] Number of positive: 1263, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000619 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 214
[LightGBM] [Info] Number of data points in the train set: 2667, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000

Evaluating LightGBM with Fine-Tuned Borderline-SMOTE on Training Data:
Accuracy: 0.6687
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.68      0.72      1404
           1       0.56      0.66      0.60       866

    accuracy                           0.67      2270
   macro avg       0.66      0.67      0.66      2270
weighted avg       0.68      0.67      0.67      2270


Evaluatin

In [80]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Define base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgb', XGBClassifier(learning_rate=0.05, max_depth=5, n_estimators=200, random_state=42))
]

# Define the final estimator
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(random_state=42)
)

# Train Stacked Ensemble on resampled data
stacked_model.fit(X_resampled, y_resampled)

# Evaluate on train and test data
print("\nEvaluating Stacked Ensemble on Training Data:")
evaluate_predict_function(train_data, stacked_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating Stacked Ensemble on Test Data:")
evaluate_predict_function(test_data, stacked_model, team_encoder, home_metrics, away_metrics)



Evaluating Stacked Ensemble on Training Data:
Accuracy: 0.6727
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.75      0.74      1404
           1       0.58      0.54      0.56       866

    accuracy                           0.67      2270
   macro avg       0.65      0.65      0.65      2270
weighted avg       0.67      0.67      0.67      2270


Evaluating Stacked Ensemble on Test Data:
Accuracy: 0.6860
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.73      0.73       266
           1       0.61      0.63      0.62       183

    accuracy                           0.69       449
   macro avg       0.68      0.68      0.68       449
weighted avg       0.69      0.69      0.69       449



In [82]:
# Evaluate a model with adjusted thresholds (using XGBoost as an example)
y_pred_proba_train = xgb_model.predict_proba(X_train)[:, 1]  # Probabilities for class 1
y_pred_proba_test = xgb_model.predict_proba(X_test)[:, 1]

# Adjust threshold
threshold = 0.4
y_pred_train = (y_pred_proba_train >= threshold).astype(int)
y_pred_test = (y_pred_proba_test >= threshold).astype(int)

# Print results
print("\nEvaluating XGBoost with Adjusted Threshold on Training Data:")
print(f"Accuracy: {accuracy_score(y_train, y_pred_train):.4f}")
print("\nClassification Report:")
print(classification_report(y_train, y_pred_train))

print("\nEvaluating XGBoost with Adjusted Threshold on Test Data:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_test))



Evaluating XGBoost with Adjusted Threshold on Training Data:
Accuracy: 0.7899

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.68      0.80      1404
           1       0.65      0.97      0.78       866

    accuracy                           0.79      2270
   macro avg       0.81      0.82      0.79      2270
weighted avg       0.85      0.79      0.79      2270


Evaluating XGBoost with Adjusted Threshold on Test Data:
Accuracy: 0.7261

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.61      0.73       266
           1       0.61      0.89      0.73       183

    accuracy                           0.73       449
   macro avg       0.75      0.75      0.73       449
weighted avg       0.78      0.73      0.73       449



In [84]:
#xgboost with adjusted threshold

In [88]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Function to evaluate XGBoost with adjusted threshold
def evaluate_predict_winner_with_threshold(data, model, team_encoder, home_metrics, away_metrics, threshold=0.3):
    """
    Evaluates the predict_winner function using an adjusted probability threshold.
    """
    # Prepare features (X) in the correct order
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order

    # True labels
    y_true = data['FTR']
    
    # Get predicted probabilities for class 1 (Away Win)
    y_pred_proba = model.predict_proba(X)[:, 1]  # Probability for class 1
    
    # Adjust threshold to determine predictions
    y_pred = (y_pred_proba >= threshold).astype(int)
    
    # Evaluate performance
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy with threshold {threshold}: {accuracy:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

# Example usage
threshold = 0.4  # Adjust threshold as needed

print("\nEvaluating XGBoost with Adjusted Threshold on Training Data:")
evaluate_predict_winner_with_threshold(train_data, xgb_model, team_encoder, home_metrics, away_metrics, threshold)

print("\nEvaluating XGBoost with Adjusted Threshold on Test Data:")
evaluate_predict_winner_with_threshold(test_data, xgb_model, team_encoder, home_metrics, away_metrics, threshold)



Evaluating XGBoost with Adjusted Threshold on Training Data:
Accuracy with threshold 0.4: 0.5520

Confusion Matrix:
[[457 947]
 [ 70 796]]

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.33      0.47      1404
           1       0.46      0.92      0.61       866

    accuracy                           0.55      2270
   macro avg       0.66      0.62      0.54      2270
weighted avg       0.71      0.55      0.53      2270


Evaluating XGBoost with Adjusted Threshold on Test Data:
Accuracy with threshold 0.4: 0.6325

Confusion Matrix:
[[115 151]
 [ 14 169]]

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.43      0.58       266
           1       0.53      0.92      0.67       183

    accuracy                           0.63       449
   macro avg       0.71      0.68      0.63       449
weighted avg       0.74      0.63      0.62       449



In [90]:
#tuning logistic regression

In [92]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline

# Define a pipeline with Logistic Regression
pipeline = Pipeline([
    ('model', LogisticRegression(random_state=42, class_weight='balanced'))
])

# Define parameter grid for tuning Logistic Regression
param_grid = {
    'model__C': [0.01, 0.1, 1, 10],  # Regularization strength
    'model__solver': ['lbfgs', 'liblinear'],  # Solver options
    'model__max_iter': [100, 200, 500]  # Maximum number of iterations
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

# Fit the model using the training data
grid_search.fit(X_train, y_train)

# Best parameters from GridSearch
best_params = grid_search.best_params_
print(f"Best Parameters: {best_params}")

# Get the best model
best_logistic_model = grid_search.best_estimator_

# Define the evaluation function
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order
    
    y = data['FTR']
    
    # Predict probabilities
    y_pred = model.predict(X)
    
    # Calculate accuracy and classification report
    accuracy = accuracy_score(y, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y, y_pred))

# Evaluate the best model on training and test data
print("\nEvaluating Logistic Regression on Training Data:")
evaluate_predict_function(train_data, best_logistic_model, team_encoder, home_metrics, away_metrics)

print("\nEvaluating Logistic Regression on Test Data:")
evaluate_predict_function(test_data, best_logistic_model, team_encoder, home_metrics, away_metrics)


Fitting 5 folds for each of 24 candidates, totalling 120 fits


python(31905) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31906) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31907) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31908) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Best Parameters: {'model__C': 0.01, 'model__max_iter': 100, 'model__solver': 'liblinear'}

Evaluating Logistic Regression on Training Data:
Accuracy: 0.6471

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.71      0.71      1404
           1       0.54      0.55      0.54       866

    accuracy                           0.65      2270
   macro avg       0.63      0.63      0.63      2270
weighted avg       0.65      0.65      0.65      2270


Evaluating Logistic Regression on Test Data:
Accuracy: 0.7082

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.73      0.75       266
           1       0.63      0.67      0.65       183

    accuracy                           0.71       449
   macro avg       0.70      0.70      0.70       449
weighted avg       0.71      0.71      0.71       449



In [94]:
#optimizing logistic regression and smote together

In [96]:
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Define the pipeline with SMOTE and Logistic Regression
pipeline = Pipeline([
    ('smote', SMOTE(random_state=42)),
    ('model', LogisticRegression(class_weight='balanced', random_state=42))
])

# Define the parameter grid
param_grid = {
    'smote__k_neighbors': [3, 5, 7, 10],  # SMOTE parameter
    'smote__sampling_strategy': [0.6, 0.7, 1.0],  # SMOTE parameter
    'model__C': [0.005, 0.01, 0.05, 0.1],  # Logistic Regression regularization
    'model__solver': ['liblinear', 'lbfgs', 'saga'],  # Logistic Regression solver
    'model__max_iter': [100, 200, 500]  # Logistic Regression iterations
}

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    verbose=2
)

# Fit the grid search on the training data
grid_search.fit(X_train, y_train)

# Display the best parameters
print("Best Parameters:", grid_search.best_params_)

# Evaluate the model with best parameters
best_model = grid_search.best_estimator_

# Define function to evaluate predict winner
def evaluate_predict_function(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict the outcomes
    y_pred = model.predict(X)
    
    # Evaluate and display metrics
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.4f}\n")
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

# Evaluate the best model on training and test datasets
print("\nEvaluating on Training Data:")
evaluate_predict_function(train_data, best_model)

print("\nEvaluating on Test Data:")
evaluate_predict_function(test_data, best_model)


Fitting 5 folds for each of 432 candidates, totalling 2160 fits




[CV] END model__C=0.01, model__max_iter=100, model__solver=lbfgs; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=100, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=lbfgs; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=lbfgs; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.1, model__max_iter=100, model__solver=lbfgs; total time=   0.0s
[CV] END model__C=0.1, model__max_iter=100, model__solver=liblinear; total time=   0.0s
[CV] END model__C=0.1, model__max_iter=




[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.6; total time=   0.0s
[CV] 



END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=5, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END mo



_max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.005, model



_sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.01, model__max_iter=100, model__solver=saga, smote__k_neighbors=5, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.01, model__max_iter=100, model__solver=saga, smote__k_neighbors=7, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=100, model__solver=saga, smote__k_neighbors=7, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=200, model__solver=liblinear, smote__k_neighbors=3, smote__sampling_strategy=0.7; tota




[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=7, smote__sampling_strategy=1.0; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=7, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=7, smote__sampling_strategy=1.0; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=7, smote__sampling_strategy=1.0; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=7, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.01, model__max_iter=500, model__solver=liblinear, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END m




[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=3, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=3, smote__sampling_strategy=1.0; total time=   0.0s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=3, smote__sampling_strategy=1.0; total time=   0.0s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=3, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=3, smote__sampling_strategy=1.0; total time=   0.1s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=5, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.05, model__max_iter=200, model__solver=saga, smote__k_neighbors=10, smote__sampling_strategy=0.6; total time=   0.0s
[CV] END model__C=0.05, model__max_iter=200, m

720 fits failed out of a total of 2160.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
720 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/imblearn/pipeline.py", line 329, in fit
    Xt, yt = self._fit(X, y, routed_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/s

In [98]:
#soft voting and stacked ensemble

In [100]:
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

# Fine-tuned Logistic Regression pipeline with SMOTE
smote = SMOTE(random_state=42, k_neighbors=10, sampling_strategy=0.7)
logistic_model = LogisticRegression(C=0.005, max_iter=100, solver='saga', random_state=42, class_weight='balanced')
logistic_pipeline = Pipeline(steps=[('smote', smote), ('model', logistic_model)])

# Define other base models
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss', scale_pos_weight=1.5)
lgb_model = LGBMClassifier(random_state=42, class_weight='balanced')
rf_model = RandomForestClassifier(random_state=42, class_weight='balanced')

# Soft Voting Ensemble
voting_clf = VotingClassifier(
    estimators=[
        ('logistic', logistic_pipeline),
        ('xgb', xgb_model),
        ('lgb', lgb_model),
        ('rf', rf_model)
    ],
    voting='soft'
)

# Stacked Ensemble
stacked_clf = StackingClassifier(
    estimators=[
        ('logistic', logistic_pipeline),
        ('xgb', xgb_model),
        ('lgb', lgb_model)
    ],
    final_estimator=RandomForestClassifier(n_estimators=100, random_state=42)
)

# Train models
print("Training Soft Voting Ensemble...")
voting_clf.fit(X_train, y_train)

print("Training Stacked Ensemble...")
stacked_clf.fit(X_train, y_train)


Training Soft Voting Ensemble...


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
Training Stacked Ensemble...
[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> inits

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000027 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:Boos

In [102]:
# Function to evaluate predict_winner
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    import pandas as pd
    
    # Prepare features
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order

    y = data['FTR']
    y_pred = model.predict(X)

    # Print metrics
    print(f"\nAccuracy: {accuracy_score(y, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y, y_pred))

# Evaluate Voting Ensemble
print("\nEvaluating Soft Voting Ensemble on Training Data:")
evaluate_predict_function(train_data, voting_clf, team_encoder, home_metrics, away_metrics)

print("\nEvaluating Soft Voting Ensemble on Test Data:")
evaluate_predict_function(test_data, voting_clf, team_encoder, home_metrics, away_metrics)

# Evaluate Stacked Ensemble
print("\nEvaluating Stacked Ensemble on Training Data:")
evaluate_predict_function(train_data, stacked_clf, team_encoder, home_metrics, away_metrics)

print("\nEvaluating Stacked Ensemble on Test Data:")
evaluate_predict_function(test_data, stacked_clf, team_encoder, home_metrics, away_metrics)



Evaluating Soft Voting Ensemble on Training Data:

Accuracy: 0.6982
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.78      0.76      1404
           1       0.62      0.56      0.59       866

    accuracy                           0.70      2270
   macro avg       0.68      0.67      0.67      2270
weighted avg       0.69      0.70      0.70      2270


Evaluating Soft Voting Ensemble on Test Data:

Accuracy: 0.7149
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.78      0.76       266
           1       0.66      0.62      0.64       183

    accuracy                           0.71       449
   macro avg       0.70      0.70      0.70       449
weighted avg       0.71      0.71      0.71       449


Evaluating Stacked Ensemble on Training Data:

Accuracy: 0.6700
Classification Report:
              precision    recall  f1-score   support

           0       0.70

In [104]:
#ensemble:

In [106]:
from sklearn.ensemble import VotingClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Define the models to ensemble
soft_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_pipeline),  # Fine-Tuned Logistic Regression (SMOTE)
        ('gnb', gnb_model),               # Gaussian Naive Bayes
        ('xgb', xgb_model),               # XGBoost (Adjusted Threshold)
        ('lgb', lgb_model),               # LightGBM
        ('et', rf_model)                  # Extra Trees
    ],
    voting='soft'
)

# Train the ensemble
soft_voting_ensemble.fit(X_train, y_train)

# Evaluate the ensemble
def evaluate_predict_function(data, model, team_encoder, home_metrics, away_metrics):
    """Evaluate the predict_winner function on a given dataset."""
    # Prepare features (X)
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct order

    # Extract true labels
    y_true = data['FTR']

    # Predict labels
    y_pred = model.predict(X)

    # Print accuracy and classification metrics
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate on Training Data
print("\nEvaluating Soft Voting Ensemble on Training Data:")
evaluate_predict_function(train_data, soft_voting_ensemble, team_encoder, home_metrics, away_metrics)

# Evaluate on Test Data
print("\nEvaluating Soft Voting Ensemble on Test Data:")
evaluate_predict_function(test_data, soft_voting_ensemble, team_encoder, home_metrics, away_metrics)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000041 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000

Evaluating Soft Voting Ensemble on Training Data:
Accuracy: 0.7013
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.81      0.77      1404
           1       0.63      0.53      0.57       866

    accuracy                           0.70      2270
   macro avg       0.68      0.67      0.67      2270
weighted avg       0.69      0.70      0.70      2270

Confusion Matrix:
[[1134  270]

In [108]:
#weight adjusted ensemble:

In [110]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the fine-tuned models
logistic_model = LogisticRegression(C=0.005, max_iter=100, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.1, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, scale_pos_weight=1)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced')

# Fit models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Define weights based on performance
weights = {
    'logistic': 0.35,  # Adjusted for highest performance
    'svm': 0.25,       # Moderate performance
    'xgb': 0.25,       # High performance
    'lgbm': 0.15       # Lower but still relevant
}

# Create a weighted VotingClassifier
weighted_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    weights=[weights['logistic'], weights['svm'], weights['xgb'], weights['lgbm']]
)

# Fit the weighted ensemble model
weighted_voting_ensemble.fit(X_train, y_train)

# Function to evaluate predict winner
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure features match trained order
    y_true = data['FTR']
    y_pred = model.predict(X)
    
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate the model
print("\nEvaluating Weighted Voting Ensemble on Training Data:")
evaluate_predict_winner(train_data, weighted_voting_ensemble)

print("\nEvaluating Weighted Voting Ensemble on Test Data:")
evaluate_predict_winner(test_data, weighted_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000

Evaluating Weighted Voting Ensemble on Training Data:
Accuracy: 0.8889867841409692

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.92      0.91      1404
           1       0.86      0.84      0.85       866

    accuracy                           0.89      2270
   macro avg       0.88      0.88      0.88      2270
weighted avg       0.89      0.89      0.89      2270


Confusion Ma

In [112]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Define models with refined weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logreg_finetuned_smote', logistic_regression_finetuned_smote),  # High-performing model
        ('gnb', gaussian_naive_bayes),                                   # High-performing model
        ('xgb_adjusted', xgb_adjusted_threshold),                        # High-performing model
        ('lgbm', lightgbm_model),                                       # Moderate-performing model
        ('ada', adaboost_model),                                        # Moderate-performing model
    ],
    voting='soft',
    weights=[4, 3, 3, 2, 2],  # Adjusted weights for stronger models
    n_jobs=-1
)

# Train the refined voting ensemble
refined_voting_ensemble.fit(X_train, y_train)

# Evaluate predict winner function
def evaluate_predict_winner(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare the features
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Correct feature order
    y = data['FTR']
    
    # Make predictions
    y_pred = model.predict(X)
    
    # Calculate accuracy and print metrics
    accuracy = accuracy_score(y, y_pred)
    print(f"Accuracy: {accuracy:.4f}\n")
    print("Classification Report:")
    print(classification_report(y, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y, y_pred))

# Evaluate on training data
print("\nEvaluating Refined Weighted Voting Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating Refined Weighted Voting Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble, team_encoder, home_metrics, away_metrics)


NameError: name 'logistic_regression_finetuned_smote' is not defined

In [114]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Initialize models
logistic_regression_finetuned_smote = LogisticRegression(
    C=0.005, max_iter=100, solver='saga', random_state=42, class_weight='balanced'
)
gaussian_naive_bayes = GaussianNB()
xgb_adjusted_threshold = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
lightgbm_model = LGBMClassifier(random_state=42)
adaboost_model = AdaBoostClassifier(n_estimators=100, random_state=42)

# Train individual models
logistic_regression_finetuned_smote.fit(X_train, y_train)
gaussian_naive_bayes.fit(X_train, y_train)
xgb_adjusted_threshold.fit(X_train, y_train)
lightgbm_model.fit(X_train, y_train)
adaboost_model.fit(X_train, y_train)

# Define weighted voting ensemble
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logreg_finetuned_smote', logistic_regression_finetuned_smote),
        ('gnb', gaussian_naive_bayes),
        ('xgb_adjusted', xgb_adjusted_threshold),
        ('lgbm', lightgbm_model),
        ('ada', adaboost_model),
    ],
    voting='soft',
    weights=[4, 3, 3, 2, 2],  # Adjust weights for stronger models
    n_jobs=-1
)

# Train the ensemble
refined_voting_ensemble.fit(X_train, y_train)

# Evaluate predict winner
def evaluate_predict_winner(data, model, team_encoder, home_metrics, away_metrics):
    # Prepare features
    X = pd.DataFrame()
    X[['HS', 'HST', 'HC']] = data['HomeTeam'].apply(
        lambda team_id: home_metrics.loc[team_id].values
    ).apply(pd.Series)
    X[['AS', 'AST', 'AC']] = data['AwayTeam'].apply(
        lambda team_id: away_metrics.loc[team_id].values
    ).apply(pd.Series)
    X['HomeTeam'] = data['HomeTeam']
    X['AwayTeam'] = data['AwayTeam']
    X = X[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure correct feature order
    y = data['FTR']
    
    # Make predictions
    y_pred = model.predict(X)
    
    # Calculate accuracy and print metrics
    accuracy = accuracy_score(y, y_pred)
    print(f"Accuracy: {accuracy:.4f}\n")
    print("Classification Report:")
    print(classification_report(y, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y, y_pred))

# Evaluate on training data
print("\nEvaluating Refined Weighted Voting Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble, team_encoder, home_metrics, away_metrics)

# Evaluate on test data
print("\nEvaluating Refined Weighted Voting Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble, team_encoder, home_metrics, away_metrics)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381498 -> initscore=-0.483196
[LightGBM] [Info] Start training from score -0.483196


python(33621) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(33622) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(33623) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(33624) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Weighted Voting Ensemble on Training Data:
Accuracy: 0.7004

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.82      0.77      1404
           1       0.64      0.50      0.56       866

    accuracy                           0.70      2270
   macro avg       0.68      0.66      0.67      2270
weighted avg       0.69      0.70      0.69      2270

Confusion Matrix:
[[1155  249]
 [ 431  435]]

Evaluating Refined Weighted Voting Ensemble on Test Data:
Accuracy: 0.7327

Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.84      0.79       266
           1       0.71      0.57      0.64       183

    accuracy                           0.73       449
   macro avg       0.73      0.71      0.71       449
weighted avg       0.73      0.73      0.73       449

Confusion Matrix:
[[224  42]
 [ 78 105]]


In [116]:
#optimising weight3d movting ensemble

In [118]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the fine-tuned models
logistic_model = LogisticRegression(C=0.005, max_iter=100, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.1, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, scale_pos_weight=1)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced')

# Fit models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Refined weights based on individual model performance
weights = {
    'logistic': 0.4,  # Logistic regression performed best
    'svm': 0.2,       # SVM had strong results
    'xgb': 0.3,       # XGBoost is reliable
    'lgbm': 0.1       # LightGBM contributes less
}

# Create a refined weighted VotingClassifier
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    weights=[weights['logistic'], weights['svm'], weights['xgb'], weights['lgbm']]
)

# Fit the refined ensemble model
refined_voting_ensemble.fit(X_train, y_train)

# Function to evaluate predict winner
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure features match trained order
    y_true = data['FTR']
    y_pred = model.predict(X)
    
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate the model
print("\nEvaluating Refined Weighted Voting Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Weighted Voting Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000

Evaluating Refined Weighted Voting Ensemble on Training Data:
Accuracy: 0.8951541850220265

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.92      0.92      1404
           1       0.87      0.85      0.86       866

    accuracy                           0.90      2270
   macro avg       0.89      0.89      0.89      2270
weighted avg       0.89      0.90      0.89      2270


Conf

In [120]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Refine the ensemble by removing potentially weaker models
refined_estimators = [
    ('logistic', logistic_model),   # Retain
    ('svm', svm_model),             # Retain or Remove based on analysis
    ('xgb', xgb_model),             # Retain
    # ('lgbm', lgbm_model),         # Optionally Remove
    # ('ada', adaboost_model)       # Optionally Remove
]

# Create a new weighted ensemble with refined estimators
refined_voting_ensemble = VotingClassifier(
    estimators=refined_estimators,
    voting='soft',
    weights=[0.4, 0.3, 0.3]  # Adjust weights based on retained models
)

# Fit the refined ensemble
refined_voting_ensemble.fit(X_train, y_train)

# Evaluate the refined ensemble
def evaluate_ensemble(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)
    
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate on train and test data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_ensemble(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_ensemble(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8709251101321586

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.91      0.90      1404
           1       0.85      0.80      0.83       866

    accuracy                           0.87      2270
   macro avg       0.87      0.86      0.86      2270
weighted avg       0.87      0.87      0.87      2270


Confusion Matrix:
[[1281  123]
 [ 170  696]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.779510022271715

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.82      0.81       266
           1       0.73      0.73      0.73       183

    accuracy                           0.78       449
   macro avg       0.77      0.77      0.77       449
weighted avg       0.78      0.78      0.78       449


Confusion Matrix:
[[217  49]
 [ 50 133]]


In [126]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

# Logistic Regression (Fine-Tuned SMOTE)
logistic_regression_finetuned_smote = LogisticRegression(
    C=0.005, 
    max_iter=100, 
    solver='saga', 
    random_state=42, 
    class_weight='balanced'
)
logistic_regression_finetuned_smote.fit(X_train, y_train)

# XGBoost with Adjusted Threshold
class XGBWithThreshold(XGBClassifier):
    def __init__(self, threshold=0.5, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
    
    def predict(self, X):
        probas = self.predict_proba(X)[:, 1]  # Get probabilities for the positive class
        return (probas >= self.threshold).astype(int)

xgb_adjusted_threshold = XGBWithThreshold(
    threshold=0.4,  # Adjust the threshold here
    use_label_encoder=False, 
    eval_metric='logloss', 
    random_state=42, 
    scale_pos_weight=1
)
xgb_adjusted_threshold.fit(X_train, y_train)

# Create the ensemble
ensemble_model = VotingClassifier(
    estimators=[
        ('logreg_finetuned_smote', logistic_regression_finetuned_smote),
        ('xgb_adjusted', xgb_adjusted_threshold),
    ],
    voting='soft',
    weights=[1, 1]
)

# Fit the ensemble
ensemble_model.fit(X_train, y_train)

# Function to evaluate predict_winner
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]  # Ensure feature order matches training
    y_true = data['FTR']
    y_pred = model.predict(X)
    
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate the ensemble model
print("\nEvaluating Ensemble on Training Data:")
evaluate_predict_winner(train_data, ensemble_model)

print("\nEvaluating Ensemble on Test Data:")
evaluate_predict_winner(test_data, ensemble_model)


Parameters: { "threshold", "use_label_encoder" } are not used.

Parameters: { "threshold", "use_label_encoder" } are not used.




Evaluating Ensemble on Training Data:
Accuracy: 0.9462555066079296

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      1404
           1       0.94      0.92      0.93       866

    accuracy                           0.95      2270
   macro avg       0.94      0.94      0.94      2270
weighted avg       0.95      0.95      0.95      2270


Confusion Matrix:
[[1353   51]
 [  71  795]]

Evaluating Ensemble on Test Data:
Accuracy: 0.7817371937639198

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.80      0.81       266
           1       0.72      0.75      0.74       183

    accuracy                           0.78       449
   macro avg       0.77      0.78      0.78       449
weighted avg       0.78      0.78      0.78       449


Confusion Matrix:
[[214  52]
 [ 46 137]]


In [128]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=5, reg_alpha=0.5)

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Step 3: Refine ensemble with adjusted weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),  # Simpler model, less prone to overfitting
        ('svm', svm_model),            # Linear model, moderate complexity
        ('xgb', xgb_model),            # Complex model with regularization
        ('lgbm', lgbm_model)           # Balanced gradient boosting model
    ],
    voting='soft',
    weights=[4, 3, 2, 2],  # Heavier weight on simpler models
    n_jobs=-1
)

# Step 4: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Evaluate predict winner function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 6: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

python(34897) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34898) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34899) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34900) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8167400881057268

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1404
           1       0.77      0.74      0.76       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1211  193]
 [ 223  643]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7906458797327395

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       266
           1       0.73      0.77      0.75       183

    accuracy                           0.79       449
   macro avg       0.78      0.79      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[215  51]
 [ 43 140]]


In [132]:
def predict_winner(home_team, away_team, team_encoder, home_metrics, away_metrics, model):
    """
    Predict the outcome of a match between two teams using the refined ensemble model.
    
    Parameters:
        home_team (str): Name of the home team.
        away_team (str): Name of the away team.
        team_encoder (LabelEncoder): Encoder for team names.
        home_metrics (pd.DataFrame): Home team performance metrics.
        away_metrics (pd.DataFrame): Away team performance metrics.
        model: Trained ensemble model.
    
    Returns:
        str: Predicted match outcome ('Home Win' or 'Away Win').
    """
    # Encode team names
    home_team_encoded = team_encoder.transform([home_team])[0]
    away_team_encoded = team_encoder.transform([away_team])[0]

    # Retrieve metrics for the teams
    home_team_features = home_metrics.loc[home_team_encoded].values
    away_team_features = away_metrics.loc[away_team_encoded].values

    # Construct input feature array
    match_features = [
        home_team_encoded,  # HomeTeam
        away_team_encoded,  # AwayTeam
        home_team_features[0],  # HS
        away_team_features[0],  # AS
        home_team_features[1],  # HST
        away_team_features[1],  # AST
        home_team_features[2],  # HC
        away_team_features[2],  # AC
    ]

    # Predict the outcome
    predicted = model.predict([match_features])[0]

    # Decode the prediction to a match outcome
    return "Home Win" if predicted == 0 else "Away Win"


In [172]:
# Example team names
home_team_name = "Man City"
away_team_name = "Man United"

# Predict the match outcome
predicted_outcome = predict_winner(
    home_team=home_team_name,
    away_team=away_team_name,
    team_encoder=team_encoder,
    home_metrics=home_metrics,
    away_metrics=away_metrics,
    model=refined_voting_ensemble
)

print(f"The predicted outcome for {home_team_name} vs. {away_team_name} is: {predicted_outcome}")


The predicted outcome for Man City vs. Man United is: Away Win


In [154]:
#further tuning refined ensemble

In [156]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with parameters optimized via grid search
logistic_model = LogisticRegression(C=0.002, max_iter=300, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.03, kernel='rbf', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=5, reg_alpha=1.5, reg_lambda=2.0, n_estimators=150, learning_rate=0.05)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=6, reg_alpha=0.7, 
                             learning_rate=0.05, num_leaves=32)

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Step 3: Adjust weights based on validation performance
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),  # Adjusted for high precision
        ('svm', svm_model),            # Kernel-based model for non-linear data
        ('xgb', xgb_model),            # Boosted tree model for complex patterns
        ('lgbm', lgbm_model)           # Balanced gradient boosting model
    ],
    voting='soft',
    weights=[4, 3, 3, 2],  # Adjusted weights based on model validation performance
    n_jobs=-1
)

# Step 4: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Predict Winner Function
def predict_winner(home_team, away_team, team_encoder, home_metrics, away_metrics, model):
    # Encode team names to numeric IDs
    home_team_id = team_encoder.transform([home_team])[0]
    away_team_id = team_encoder.transform([away_team])[0]

    # Fetch team-specific metrics
    home_team_stats = home_metrics.loc[home_team_id].values
    away_team_stats = away_metrics.loc[away_team_id].values

    # Prepare input features
    input_features = [home_team_id, away_team_id] + home_team_stats.tolist() + away_team_stats.tolist()
    input_df = pd.DataFrame([input_features], columns=['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC'])

    # Make prediction
    prediction = model.predict(input_df)[0]
    return "Home Win" if prediction == 0 else "Away Win"

# Step 6: Evaluate Predict Winner Function
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)

    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 7: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)

# Step 8: Predict a specific match outcome
home_team_name = "Manchester United"
away_team_name = "Chelsea"
predicted_outcome = predict_winner(
    home_team=home_team_name,
    away_team=away_team_name,
    team_encoder=team_encoder,
    home_metrics=home_metrics,
    away_metrics=away_metrics,
    model=refined_voting_ensemble
)
print(f"The predicted outcome for {home_team_name} vs. {away_team_name} is: {predicted_outcome}")


[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

python(35812) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(35813) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(35814) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(35815) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.7911894273127753

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.85      0.83      1404
           1       0.74      0.70      0.72       866

    accuracy                           0.79      2270
   macro avg       0.78      0.77      0.78      2270
weighted avg       0.79      0.79      0.79      2270


Confusion Matrix:
[[1194  210]
 [ 264  602]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7906458797327395

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.82      0.82       266
           1       0.74      0.75      0.75       183

    accuracy                           0.79       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[217  49]
 [ 45 138]]


ValueError: y contains previously unseen labels: 'Manchester United'

In [158]:
#further hyperparameter tuning redefined ensemble

In [160]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Base datasets for training and testing
X_train_base = X_train
y_train_base = y_train
X_test_base = X_test
y_test_base = y_test

# Step 1: Hyperparameter optimization for Logistic Regression
logistic_param_grid = {
    'C': [0.001, 0.003, 0.01, 0.1],
    'max_iter': [100, 200, 500],
    'solver': ['saga', 'liblinear'],
    'class_weight': ['balanced']
}

logistic_search = GridSearchCV(LogisticRegression(random_state=42), logistic_param_grid, cv=5, scoring='accuracy', n_jobs=-1)
logistic_search.fit(X_train_base, y_train_base)
logistic_best = logistic_search.best_estimator_
print("Best Parameters for Logistic Regression:", logistic_search.best_params_)

# Step 2: Hyperparameter optimization for SVM
svm_param_grid = {
    'C': [0.01, 0.05, 0.1, 1],
    'kernel': ['linear', 'rbf'],
    'class_weight': ['balanced']
}

svm_search = GridSearchCV(SVC(probability=True, random_state=42), svm_param_grid, cv=5, scoring='accuracy', n_jobs=-1)
svm_search.fit(X_train_base, y_train_base)
svm_best = svm_search.best_estimator_
print("Best Parameters for SVM:", svm_search.best_params_)

# Step 3: Hyperparameter optimization for XGBoost
xgb_param_grid = {
    'max_depth': [3, 4, 5],
    'n_estimators': [50, 100, 200],
    'reg_alpha': [0, 0.5, 1],
    'reg_lambda': [1, 2, 3],
    'learning_rate': [0.01, 0.05, 0.1]
}

xgb_search = RandomizedSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
                                 xgb_param_grid, cv=5, scoring='accuracy', n_jobs=-1, n_iter=20, random_state=42)
xgb_search.fit(X_train_base, y_train_base)
xgb_best = xgb_search.best_estimator_
print("Best Parameters for XGBoost:", xgb_search.best_params_)

# Step 4: Hyperparameter optimization for LightGBM
lgbm_param_grid = {
    'max_depth': [3, 5, 7],
    'n_estimators': [50, 100, 200],
    'reg_alpha': [0, 0.5, 1],
    'learning_rate': [0.01, 0.05, 0.1],
    'class_weight': ['balanced']
}

lgbm_search = RandomizedSearchCV(LGBMClassifier(random_state=42),
                                  lgbm_param_grid, cv=5, scoring='accuracy', n_jobs=-1, n_iter=20, random_state=42)
lgbm_search.fit(X_train_base, y_train_base)
lgbm_best = lgbm_search.best_estimator_
print("Best Parameters for LightGBM:", lgbm_search.best_params_)

# Step 5: Assemble the refined ensemble
refined_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_best),
        ('svm', svm_best),
        ('xgb', xgb_best),
        ('lgbm', lgbm_best)
    ],
    voting='soft',
    weights=[4, 3, 2, 2],  # Adjust based on final performance
    n_jobs=-1
)

# Step 6: Fit the refined ensemble on the training data
refined_ensemble.fit(X_train_base, y_train_base)

# Step 7: Evaluate the refined ensemble
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)

    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_ensemble)




Best Parameters for Logistic Regression: {'C': 0.003, 'class_weight': 'balanced', 'max_iter': 100, 'solver': 'saga'}
Best Parameters for SVM: {'C': 0.01, 'class_weight': 'balanced', 'kernel': 'linear'}


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best Parameters for XGBoost: {'reg_lambda': 2, 'reg_alpha': 0, 'n_estimators': 200, 'max_depth': 4, 'learning_rate': 0.05}
[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816

Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8167400881057268

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1404
           1       0.77      0.74      0.76       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1210  194]
 [ 222  644]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7861915367483296

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.81      0.82       266
           1       0.73      0.75      0.74       183

    accuracy                           0.79       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[216  50]
 [ 46 137]]


In [164]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models
logistic_model = LogisticRegression(max_iter=300, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(kernel='rbf', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced')

# Step 2: Create the VotingClassifier
voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    n_jobs=-1
)

# Step 3: Define parameter grid for GridSearchCV
param_grid = {
    'logistic__C': [0.001, 0.002, 0.003, 0.005],
    'svm__C': [0.01, 0.03, 0.05],
    'xgb__max_depth': [4, 5, 6],
    'xgb__learning_rate': [0.03, 0.05, 0.1],
    'xgb__n_estimators': [100, 150, 200],
    'lgbm__max_depth': [5, 6, 7],
    'lgbm__learning_rate': [0.03, 0.05, 0.1],
    'lgbm__num_leaves': [16, 32, 64],
    'weights': [[3, 3, 2, 2], [4, 3, 3, 2], [4, 3, 2, 1]],  # Adjust ensemble weights
}

# Step 4: Perform GridSearchCV
grid_search = GridSearchCV(
    estimator=voting_ensemble,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)

# Step 5: Fit grid search on training data
grid_search.fit(X_train, y_train)

# Step 6: Best parameters and performance
print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

# Step 7: Evaluate Predict Winner Function
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)

    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 8: Evaluate on training and testing data
print("\nEvaluating Best Model on Training Data:")
evaluate_predict_winner(train_data, best_model)

print("\nEvaluating Best Model on Test Data:")
evaluate_predict_winner(test_data, best_model)


Fitting 5 folds for each of 26244 candidates, totalling 131220 fits


python(36332) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(36333) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(36334) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(36335) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041451 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041524 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threa

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040769 seconds.
You can set `force_row_wise=true` to rem

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046660 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.





Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041694 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Start training from sc

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041408 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050472 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042737 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042591 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040861 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [In

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055763 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041688 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8


Parameters: { "use_label_encoder" } are not used.


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206



Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049012 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040562 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042580 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042127 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040784 seconds.
You can set `force_col_wise=true` to re

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ov

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042787 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM]

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040350 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041588 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050559 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040578 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.04

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034530 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041413 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040970 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040293 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threa

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040887 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042634 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040598 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051507 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052531 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050803 seconds.
You can set `force_col_wise=true` to re

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051577 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051618 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points 

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040494 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043334 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044293 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Tot

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041474 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034400 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043014 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043526 seconds.
You can s

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040789 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042887 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threa

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> in

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.029868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050410 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040298 seconds.
You can set `force_col_wise=true` to remove the over

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044370 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044013 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042431 seconds.
You can set `force_col_wise=true` to remove the over

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040810 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041706 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040291 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040271 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040459 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042670 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040750 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041634 seconds.
You can set `force_col_wise=true` to re

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042976 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041898 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040778 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040416 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040295 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042963 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040695 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.





Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040920 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040663 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051895 seconds.
You can set `force_col_wise=true` to rem

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040719 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040754 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041509 seconds.
You can set `force_col_wise=true` to remove the over

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041930 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.
Parameters: { "use_label_encoder" } are not used.


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040668 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041826 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [In

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042738 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051607 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040529 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042875 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035934 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045535 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.


[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041504 seconds.
You can set `force_col_wise=true` to remove the overhead.





[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040525 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042420 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGB

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040846 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039477 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040430 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040997 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040653 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041681 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040413 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050517 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049970 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050871 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043679 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040242 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040266 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040862 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040308 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061571 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041540 second

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044233 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070118 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.





Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037002 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043751 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040509 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041671 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050786 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015664 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035677 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032393 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028265 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039603 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083497 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026648 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041851 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044948 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041694 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041951 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:Boo

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050776 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044998 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055346 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> in

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.047259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040471 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043391 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039648 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036889 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040531 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] T

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040494 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041618 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034373 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 692, number of negative: 1124


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043816 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044388 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044573 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042548 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] T

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040973 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040967 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040621 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040906 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM]

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042807 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054599 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050620 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044429 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044363 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040928 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040596 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGB

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ov

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018652 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040496 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of d

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040327 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040369 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040483 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040367 seconds.
You can set `force_col_wise=true` to remove the o

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.


[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number


Parameters: { "use_label_encoder" } are not used.





Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040012 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 204
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045189 seconds.
You can set `force_col_wise=true` to remove the overhead.


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041464 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040327 seconds.
You can set `for

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



KeyboardInterrupt: 

In [170]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from statsmodels.discrete.discrete_model import Poisson

# Step 1: Prepare data
# Features for predicting goals
features = ['HS', 'AS', 'HST', 'AST', 'HC', 'AC']
X_train_home = train_data[features]
X_train_away = train_data[features]
X_test_home = test_data[features]
X_test_away = test_data[features]

# Targets: Goals scored by each team
y_train_home = train_data['FTHG']  # Full-Time Home Goals
y_train_away = train_data['FTAG']  # Full-Time Away Goals
y_test_home = test_data['FTHG']
y_test_away = test_data['FTAG']

# Step 2: Fit Poisson regression models
poisson_home = Poisson(y_train_home, X_train_home).fit(disp=False)
poisson_away = Poisson(y_train_away, X_train_away).fit(disp=False)

# Step 3: Predict goals
pred_home_goals = np.round(poisson_home.predict(X_test_home))
pred_away_goals = np.round(poisson_away.predict(X_test_away))

# Step 4: Determine match outcome
def determine_outcome(home_goals, away_goals):
    if home_goals > away_goals:
        return 0  # Home Win
    elif home_goals < away_goals:
        return 2  # Away Win
    else:
        return 1  # Draw

y_pred_outcome = [determine_outcome(h, a) for h, a in zip(pred_home_goals, pred_away_goals)]

# Step 5: Evaluate performance
y_true_outcome = test_data['FTR']  # Actual outcomes
print("Accuracy:", accuracy_score(y_true_outcome, y_pred_outcome))
print("\nClassification Report:")
print(classification_report(y_true_outcome, y_pred_outcome))
print("\nConfusion Matrix:")
print(confusion_matrix(y_true_outcome, y_pred_outcome))

# Step 6: Predict specific match outcome
def predict_match_outcome(home_team, away_team, team_encoder, home_metrics, away_metrics, poisson_home, poisson_away):
    home_team_id = team_encoder.transform([home_team])[0]
    away_team_id = team_encoder.transform([away_team])[0]

    home_stats = home_metrics.loc[home_team_id].values
    away_stats = away_metrics.loc[away_team_id].values

    features = home_stats.tolist() + away_stats.tolist()
    home_goals = round(poisson_home.predict([features])[0])
    away_goals = round(poisson_away.predict([features])[0])

    return determine_outcome(home_goals, away_goals)

home_team_name = "Man United"
away_team_name = "Aston Villa"
predicted_outcome = predict_match_outcome(
    home_team=home_team_name,
    away_team=away_team_name,
    team_encoder=team_encoder,
    home_metrics=home_metrics,
    away_metrics=away_metrics,
    poisson_home=poisson_home,
    poisson_away=poisson_away
)

print(f"The predicted outcome for {home_team_name} vs. {away_team_name} is: {predicted_outcome}")


Accuracy: 0.6013363028953229

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.67      0.77       266
           1       0.56      0.51      0.53       183
           2       0.00      0.00      0.00         0

    accuracy                           0.60       449
   macro avg       0.49      0.39      0.43       449
weighted avg       0.76      0.60      0.67       449


Confusion Matrix:
[[177  72  17]
 [ 19  93  71]
 [  0   0   0]]
The predicted outcome for Man United vs. Aston Villa is: 2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [174]:
#redefined ensemble wt=ith smote instead of balanced class:

In [176]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# Step 1: Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Step 2: Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight=None)
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight=None)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, max_depth=5, reg_alpha=0.5)

# Step 3: Fit individual models on the resampled training data
logistic_model.fit(X_train_resampled, y_train_resampled)
svm_model.fit(X_train_resampled, y_train_resampled)
xgb_model.fit(X_train_resampled, y_train_resampled)
lgbm_model.fit(X_train_resampled, y_train_resampled)

# Step 4: Refine ensemble with adjusted weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    weights=[4, 3, 2, 2],  # Adjust weights based on importance
    n_jobs=-1
)

# Step 5: Fit the ensemble on the resampled training data
refined_voting_ensemble.fit(X_train_resampled, y_train_resampled)

# Step 6: Evaluate Predict Winner Function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 7: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 1404, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000045 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 214
[LightGBM] [Info] Number of data points in the train set: 2808, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


python(37354) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(37355) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(37356) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(37357) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8074889867841409

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.82      0.84      1404
           1       0.73      0.79      0.76       866

    accuracy                           0.81      2270
   macro avg       0.80      0.80      0.80      2270
weighted avg       0.81      0.81      0.81      2270


Confusion Matrix:
[[1147  257]
 [ 180  686]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7817371937639198

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.76      0.80       266
           1       0.70      0.82      0.75       183

    accuracy                           0.78       449
   macro avg       0.78      0.79      0.78       449
weighted avg       0.79      0.78      0.78       449


Confusion Matrix:
[[201  65]
 [ 33 150]]


In [178]:
#smote with redefined esnsemble 2:

In [180]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: SMOTE for Handling Class Imbalance
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Step 2: Initialize Models with Optimized Hyperparameters
logistic_model = LogisticRegression(C=0.002, max_iter=300, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.03, kernel='rbf', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=5, reg_alpha=1.5, reg_lambda=2.0, n_estimators=150, learning_rate=0.05)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=6, reg_alpha=0.7, 
                             learning_rate=0.05, num_leaves=32)

# Step 3: Fit Individual Models
logistic_model.fit(X_train_smote, y_train_smote)
svm_model.fit(X_train_smote, y_train_smote)
xgb_model.fit(X_train_smote, y_train_smote)
lgbm_model.fit(X_train_smote, y_train_smote)

# Step 4: Refine Ensemble with Adjusted Weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    weights=[4, 3, 3, 2],  # Adjusted weights based on model performance
    n_jobs=-1
)

# Step 5: Fit the Ensemble Model
refined_voting_ensemble.fit(X_train_smote, y_train_smote)

# Step 6: Evaluate Predict Winner Function
def evaluate_predict_winner(data, model):
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)
    
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 7: Evaluate the Ensemble Model
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 1404, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000057 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 214
[LightGBM] [Info] Number of data points in the train set: 2808, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.788546255506608

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.80      0.82      1404
           1       0.71      0.76      0.73       866

    accuracy                           0.79      2270
   macro avg       0.78      0.78      0.78      2270
weighted avg       0.79      0.79      0.79      2270


Confusion Matrix:
[[1128  276]
 [ 204  662]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.77728285077951

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.75      0.80       266
           1       0.69      0.82      0.75       183

    accuracy                           0.78       449
   macro avg       0.77      0.78      0.77       449
weighted avg       0.79      0.78      0.78       449


Confusion Matrix:
[[199  67]
 [ 33 150]]


In [182]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with balanced class weights and optimized hyperparameters
logistic_model = LogisticRegression(C=0.002, max_iter=300, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.03, kernel='rbf', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42,
                          max_depth=5, reg_alpha=1.5, reg_lambda=2.0, n_estimators=150, learning_rate=0.05)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=6, reg_alpha=0.7,
                             learning_rate=0.05, num_leaves=32)

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Step 3: Define the ensemble with adjusted weights for soft voting
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    weights=[4, 3, 3, 2],  # Adjusted weights based on prior performance
    n_jobs=-1
)

# Step 4: Fit the ensemble model
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Define evaluation function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    y_pred = model.predict(X)

    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 6: Evaluate the ensemble model on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.7911894273127753

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.85      0.83      1404
           1       0.74      0.70      0.72       866

    accuracy                           0.79      2270
   macro avg       0.78      0.77      0.78      2270
weighted avg       0.79      0.79      0.79      2270


Confusion Matrix:
[[1194  210]
 [ 264  602]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7906458797327395

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.82      0.82       266
           1       0.74      0.75      0.75       183

    accuracy                           0.79       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[217  49]
 [ 45 138]]


In [184]:
#adjusting weights

In [186]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=5, reg_alpha=0.5)

# Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Adjust ensemble weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),  # Simpler model, less prone to overfitting
        ('svm', svm_model),            # Linear model, moderate complexity
        ('xgb', xgb_model),            # Complex model with regularization
        ('lgbm', lgbm_model)           # Balanced gradient boosting model
    ],
    voting='soft',
    weights=[5, 4, 3, 2],  # Heavier weight on better-performing models
    n_jobs=-1
)

# Fit the refined ensemble model on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Function to evaluate the refined ensemble
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate on training and testing data
print("\nEvaluating Refined Weighted Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Weighted Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Weighted Ensemble on Training Data:
Accuracy: 0.8171806167400881

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.87      0.85      1404
           1       0.77      0.74      0.75       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1218  186]
 [ 229  637]]

Evaluating Refined Weighted Ensemble on Test Data:
Accuracy: 0.7884187082405345

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       266
           1       0.73      0.75      0.74       183

    accuracy                           0.79       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[216  50]
 [ 45 138]]


In [188]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier

# Adjust weights in the ensemble
param_grid = {
    'weights': [
        [4, 3, 2, 2], [5, 3, 2, 1], [3, 3, 2, 2], [4, 4, 3, 1]
    ]
}

voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),
        ('svm', svm_model),
        ('xgb', xgb_model),
        ('lgbm', lgbm_model)
    ],
    voting='soft',
    n_jobs=-1
)

grid_search = GridSearchCV(voting_ensemble, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate best model
best_ensemble = grid_search.best_estimator_
print("\nBest Weights:", grid_search.best_params_)

evaluate_predict_winner(train_data, best_ensemble)
evaluate_predict_winner(test_data, best_ensemble)


python(38370) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(38371) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(38372) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(38373) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042522 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040963 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 692, number of negative: 1124
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054643 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050588 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041730 seconds.
You can set `force_col_wise=true` to remove the o

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076493 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 206
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[Ligh

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Number of positive: 693, number of negative: 1123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041345 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 208
[LightGBM] [Info] Total Bins 203
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] Number of data points in the train set: 1816, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061986 seconds.
You can set `force_col_wise=true` to rem

Parameters: { "use_label_encoder" } are not used.




Best Weights: {'weights': [4, 4, 3, 1]}
Accuracy: 0.8167400881057268

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.87      0.85      1404
           1       0.78      0.73      0.75       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1226  178]
 [ 238  628]]
Accuracy: 0.7839643652561247

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.82      0.82       266
           1       0.73      0.74      0.74       183

    accuracy                           0.78       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.78      0.78      0.78       449


Confusion Matrix:
[[217  49]
 [ 48 135]]


In [190]:
#including random forest in ensemble:

In [192]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=5, reg_alpha=0.5)
random_forest_model = RandomForestClassifier(n_estimators=150, max_depth=10, random_state=42, class_weight='balanced')

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)
random_forest_model.fit(X_train, y_train)

# Step 3: Refine ensemble with adjusted weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),      # Simpler model, less prone to overfitting
        ('svm', svm_model),                # Linear model, moderate complexity
        ('xgb', xgb_model),                # Complex model with regularization
        ('lgbm', lgbm_model),              # Balanced gradient boosting model
        ('rf', random_forest_model)        # Random Forest model
    ],
    voting='soft',
    weights=[4, 3, 2, 2, 3],  # Adjusted weights to balance model contributions
    n_jobs=-1
)

# Step 4: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Evaluate predict winner function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 6: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.839647577092511

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.88      0.87      1404
           1       0.80      0.77      0.79       866

    accuracy                           0.84      2270
   macro avg       0.83      0.83      0.83      2270
weighted avg       0.84      0.84      0.84      2270


Confusion Matrix:
[[1237  167]
 [ 197  669]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7861915367483296

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       266
           1       0.73      0.75      0.74       183

    accuracy                           0.79       449
   macro avg       0.78      0.78      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[215  51]
 [ 45 138]]


In [194]:
#hyperparamter tuning ensemble 3:

In [196]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Hyperparameter optimization for individual models

# Logistic Regression
logistic_params = {
    'C': [0.001, 0.003, 0.01],
    'max_iter': [100, 200, 300],
    'solver': ['saga'],
    'class_weight': ['balanced']
}
logistic_grid = GridSearchCV(LogisticRegression(random_state=42), logistic_params, cv=3, scoring='accuracy', n_jobs=-1)
logistic_grid.fit(X_train, y_train)
best_logistic = logistic_grid.best_estimator_

# SVM
svm_params = {
    'C': [0.01, 0.05, 0.1],
    'kernel': ['linear', 'rbf'],
    'class_weight': ['balanced']
}
svm_grid = GridSearchCV(SVC(probability=True, random_state=42), svm_params, cv=3, scoring='accuracy', n_jobs=-1)
svm_grid.fit(X_train, y_train)
best_svm = svm_grid.best_estimator_

# XGBoost
xgb_params = {
    'max_depth': [3, 4, 5],
    'reg_alpha': [0.5, 1.0, 1.5],
    'reg_lambda': [1.0, 2.0, 3.0],
    'n_estimators': [100, 150, 200],
    'learning_rate': [0.05, 0.1]
}
xgb_grid = GridSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42), xgb_params, cv=3, scoring='accuracy', n_jobs=-1)
xgb_grid.fit(X_train, y_train)
best_xgb = xgb_grid.best_estimator_

# LightGBM
lgbm_params = {
    'max_depth': [4, 5, 6],
    'reg_alpha': [0.5, 0.7, 1.0],
    'learning_rate': [0.05, 0.1],
    'num_leaves': [20, 31, 40],
    'class_weight': ['balanced']
}
lgbm_grid = GridSearchCV(LGBMClassifier(random_state=42), lgbm_params, cv=3, scoring='accuracy', n_jobs=-1)
lgbm_grid.fit(X_train, y_train)
best_lgbm = lgbm_grid.best_estimator_

# Step 2: Create the refined VotingClassifier with optimized models
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', best_logistic),
        ('svm', best_svm),
        ('xgb', best_xgb),
        ('lgbm', best_lgbm)
    ],
    voting='soft',
    weights=[4, 3, 3, 2],  # Adjust weights based on performance
    n_jobs=-1
)

# Step 3: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 4: Evaluate predict winner function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 5: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 577, number of negative: 936
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 199
[LightGBM] [Info] Number of data points in the train set: 1513, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number of

Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.779295154185022

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.83      0.82      1404
           1       0.72      0.69      0.71       866

    accuracy                           0.78      2270
   macro avg       0.77      0.76      0.76      2270
weighted avg       0.78      0.78      0.78      2270


Confusion Matrix:
[[1170  234]
 [ 267  599]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7928730512249443

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.80      0.82       266
           1       0.73      0.78      0.75       183

    accuracy                           0.79       449
   macro avg       0.79      0.79      0.79       449
weighted avg       0.80      0.79      0.79       449


Confusion Matrix:
[[214  52]
 [ 41 142]]


In [198]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=5, reg_alpha=0.5)

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Step 3: Refine ensemble with adjusted weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),  # Simpler model, less prone to overfitting
        ('svm', svm_model),            # Linear model, moderate complexity
        ('xgb', xgb_model),            # Complex model with regularization
        ('lgbm', lgbm_model)           # Balanced gradient boosting model
    ],
    voting='soft',
    weights=[4, 3, 2, 2],  # Heavier weight on simpler models
    n_jobs=-1
)

# Step 4: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Evaluate predict winner function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 6: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000998 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


python(40256) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(40257) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(40258) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(40259) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8167400881057268

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1404
           1       0.77      0.74      0.76       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1211  193]
 [ 223  643]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7906458797327395

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       266
           1       0.73      0.77      0.75       183

    accuracy                           0.79       449
   macro avg       0.78      0.79      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[215  51]
 [ 43 140]]


In [200]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Initialize models with adjusted regularization and parameters
logistic_model = LogisticRegression(C=0.003, max_iter=200, solver='saga', random_state=42, class_weight='balanced')
svm_model = SVC(C=0.05, kernel='linear', probability=True, random_state=42, class_weight='balanced')
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, 
                          max_depth=4, reg_alpha=1.0, reg_lambda=2.0, n_estimators=100)
lgbm_model = LGBMClassifier(random_state=42, class_weight='balanced', max_depth=5, reg_alpha=0.5)

# Step 2: Fit individual models on the training data
logistic_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Step 3: Refine ensemble with adjusted weights
refined_voting_ensemble = VotingClassifier(
    estimators=[
        ('logistic', logistic_model),  # Simpler model, less prone to overfitting
        ('svm', svm_model),            # Linear model, moderate complexity
        ('xgb', xgb_model),            # Complex model with regularization
        ('lgbm', lgbm_model)           # Balanced gradient boosting model
    ],
    voting='soft',
    weights=[4, 3, 2, 2],  # Heavier weight on simpler models
    n_jobs=-1
)

# Step 4: Fit the ensemble on the training data
refined_voting_ensemble.fit(X_train, y_train)

# Step 5: Evaluate predict winner function
def evaluate_predict_winner(data, model):
    # Extract features and target
    X = data[['HomeTeam', 'AwayTeam', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC']]
    y_true = data['FTR']
    
    # Predict using the model
    y_pred = model.predict(X)
    
    # Print evaluation metrics
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Step 6: Evaluate on training and testing data
print("\nEvaluating Refined Ensemble on Training Data:")
evaluate_predict_winner(train_data, refined_voting_ensemble)

print("\nEvaluating Refined Ensemble on Test Data:")
evaluate_predict_winner(test_data, refined_voting_ensemble)



Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 866, number of negative: 1404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041907 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 210
[LightGBM] [Info] Number of data points in the train set: 2270, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


python(51494) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(51495) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(51496) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(51497) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Parameters: { "use_label_encoder" } are not used.




Evaluating Refined Ensemble on Training Data:
Accuracy: 0.8167400881057268

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1404
           1       0.77      0.74      0.76       866

    accuracy                           0.82      2270
   macro avg       0.81      0.80      0.80      2270
weighted avg       0.82      0.82      0.82      2270


Confusion Matrix:
[[1211  193]
 [ 223  643]]

Evaluating Refined Ensemble on Test Data:
Accuracy: 0.7906458797327395

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       266
           1       0.73      0.77      0.75       183

    accuracy                           0.79       449
   macro avg       0.78      0.79      0.78       449
weighted avg       0.79      0.79      0.79       449


Confusion Matrix:
[[215  51]
 [ 43 140]]


In [202]:
def predict_winner(home_team, away_team, team_encoder, home_metrics, away_metrics, model):
    """
    Predict the outcome of a match between two teams using the refined ensemble model.
    
    Parameters:
        home_team (str): Name of the home team.
        away_team (str): Name of the away team.
        team_encoder (LabelEncoder): Encoder for team names.
        home_metrics (pd.DataFrame): Home team performance metrics.
        away_metrics (pd.DataFrame): Away team performance metrics.
        model: Trained ensemble model.
    
    Returns:
        str: Predicted match outcome ('Home Win' or 'Away Win').
    """
    # Encode team names
    home_team_encoded = team_encoder.transform([home_team])[0]
    away_team_encoded = team_encoder.transform([away_team])[0]

    # Retrieve metrics for the teams
    home_team_features = home_metrics.loc[home_team_encoded].values
    away_team_features = away_metrics.loc[away_team_encoded].values

    # Construct input feature array
    match_features = [
        home_team_encoded,  # HomeTeam
        away_team_encoded,  # AwayTeam
        home_team_features[0],  # HS
        away_team_features[0],  # AS
        home_team_features[1],  # HST
        away_team_features[1],  # AST
        home_team_features[2],  # HC
        away_team_features[2],  # AC
    ]

    # Predict the outcome
    predicted = model.predict([match_features])[0]

    # Decode the prediction to a match outcome
    return "Home Win" if predicted == 0 else "Away Win"


In [278]:
home_team_name = "Man City"
away_team_name = "Man United"

# Predict the match outcome
predicted_outcome = predict_winner(
    home_team=home_team_name,
    away_team=away_team_name,
    team_encoder=team_encoder,
    home_metrics=home_metrics,
    away_metrics=away_metrics,
    model=refined_voting_ensemble
)

print(f"The predicted outcome for {home_team_name} vs. {away_team_name} is: {predicted_outcome}")


The predicted outcome for Man City vs. Man United is: Away Win


In [280]:
import joblib

# Save the trained ensemble model
joblib.dump(refined_voting_ensemble, "/Users/azizraihan/Desktop/cse299/finale/refined_voting_ensemble.pkl")

# Save the team encoder
joblib.dump(team_encoder, "/Users/azizraihan/Desktop/cse299/finale/team_encoder.pkl")

# Save the home and away metrics
home_metrics.to_pickle("/Users/azizraihan/Desktop/cse299/finale/home_metrics.pkl")
away_metrics.to_pickle("/Users/azizraihan/Desktop/cse299/finale/away_metrics.pkl")


In [282]:
import joblib

# Save X_train and y_train to separate files
joblib.dump(X_train, '/Users/azizraihan/Desktop/cse299/finale/X_train.joblib')
joblib.dump(y_train, '/Users/azizraihan/Desktop/cse299/finale/y_train.joblib')


['/Users/azizraihan/Desktop/cse299/finale/y_train.joblib']

In [284]:
import joblib

# Save train_data and test_data to separate files
joblib.dump(train_data, '/Users/azizraihan/Desktop/cse299/finale/train_data.joblib')
joblib.dump(test_data, '/Users/azizraihan/Desktop/cse299/finale/test_data.joblib')


['/Users/azizraihan/Desktop/cse299/finale/test_data.joblib']