In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv("league_record.csv")

In [None]:
pd.set_option('display.max_columns', None)

df

In [None]:

def data_preprocessor(data):
    global df_result
    df_result = data.copy()

    # Define function to check home and away scores and return result
    def get_result(row):
        if row['HCS'] > row['ACS']:
            return 2  # home_win = 2
        elif row['HCS'] < row['ACS']:
            return 1  # away_win = 1
        else:
            return 0  # draw = 0
        
    # Apply function to each row of the dataframe to create a new column
    df_result['result'] = df_result.apply(get_result, axis=1)

    df_result = df_result[df_result['week'] > 4]  # remove rows with incomplete values

    # remove irrelevant features
    df_result = df_result.drop(["league_id", "week", "hour", "minutes", "HCS", "ACS", "HT", "AT"], axis=1)  

    df_result = df_result.reset_index(drop=True)  # reset the index values

    # Scaling the dataset using standardization method
    X = df_result.drop("result", axis=1)
    y = df_result["result"]

    # assume that X is your dataset with numerical features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # # concatenate the scaled numerical features with the categorical features
    data_preprocessed = pd.concat([pd.DataFrame(X_scaled, columns=X.columns), y], axis=1)

    return data_preprocessed

In [None]:
processed_data = data_preprocessor(df)
processed_data

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Load your dataset into a pandas DataFrame
dataset = processed_data

# Load your dataset into X and y arrays
X = dataset.drop("result", axis=1)
y = dataset["result"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a random forest classifier with 100 trees
rfc = RandomForestClassifier(random_state=42)

# Fit the model on the training data
rfc.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rfc.predict(X_test)

# Calculate the accuracy and F1 score of the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print('Accuracy:', accuracy)
print('F1 score:', f1)

In [None]:


# Accuracy: 0.6951357466063348
# F1 score: 0.679784963649518

In [None]:
new = processed_data.copy().head(5)
check = new["result"]
new = new.drop("result", axis=1)
new1 = rfc.predict(new)
see = accuracy_score(check, new1)
see

In [None]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load your dataset into a pandas DataFrame
dataset = processed_data

# Specify the target variable
target_var = 'result'

# Specify the number of folds for k-fold cross-validation
n_splits = 100

# Initialize the KFold cross-validation object
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Initialize lists to store the accuracy and F1 scores for each fold
accuracy_scores = []
f1_scores = []

# Loop over each fold
for train_idx, test_idx in kfold.split(dataset):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = dataset.drop(target_var, axis=1).iloc[train_idx], dataset.drop(target_var, axis=1).iloc[test_idx]
    y_train, y_test = dataset[target_var].iloc[train_idx], dataset[target_var].iloc[test_idx]
    
    # Initialize a random forest classifier with default hyperparameters
    rf = RandomForestClassifier(random_state=42)
    
    # Fit the classifier to the training data
    rf.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = rf.predict(X_test)
    
    # Calculate the accuracy and F1 score for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # Append the accuracy and F1 scores to the lists
    accuracy_scores.append(accuracy)
    f1_scores.append(f1)

# Calculate the mean accuracy and F1 score across all folds
mean_accuracy = sum(accuracy_scores) / n_splits
mean_f1 = sum(f1_scores) / n_splits

print(f"Mean accuracy score: {mean_accuracy}")
print(f"Mean F1 score: {mean_f1}")

In [None]:

# Mean accuracy score: 0.7037461695607764
# Mean F1 score: 0.6904530247238667

In [None]:
# Testing the model

test = df.loc[8355:8359]
test

new = data_preprocessor(test)
new = new.drop("result", axis=1)
new_pred = rfc.predict(new)
new_pred

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome("chrome_driver/chromedriver.exe")

# Navigate to a web page
driver.get("https://vsmobile.bet9ja.com/mobile/themes/?sk=bet9ja&t=b61c29e6-9348-4c58-af90-378760a74693&game"
               "=league_premier&pid=14001,14003,14011,14012,14014,14015,14016,"
               "14017&v=0&text=Premier&lang=en_GB#resutls&ui_state=dialog")
# driver.maximize_window()

print("waiting ...")
time.sleep(2)

driver.find_element(By.XPATH, "//div[@class='ui-panel-wrapper']//i[@class='fa fa-bars icon-menu']").click()
time.sleep(1)

driver.find_element(By.XPATH, "//a[@id='a_bet_results']").click()
time.sleep(3)

all_weeks = driver.find_element(By.XPATH, "//table[@id='results-div-header-mainTable']").text
print(all_weeks)

# driver.find_element(By.XPATH, "//div[@id='results_content']").click()
# time.sleep(1)

# action = ActionChains(driver)
# action.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
# action.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()

# # Get the copied text from the clipboard
# import pyperclip
# copied_text = pyperclip.paste()
 
# # Print the copied text
# print(copied_text)
time.sleep(3)

# Close the browser session
# driver.quit()

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
from selenium.webdriver.chrome.service import Service

service = Service("chrome_driver/chromedriver.exe")
driver = webdriver.Chrome(service=service)

# Navigate to a web page
driver.get("https://vsmobile.bet9ja.com/mobile/themes/?sk=bet9ja&t=b61c29e6-9348-4c58-af90-378760a74693&game"
               "=league_premier&pid=14001,14003,14011,14012,14014,14015,14016,"
               "14017&v=0&text=Premier&lang=en_GB#resutls&ui_state=dialog")
# driver.maximize_window()
time.sleep(10)

action = ActionChains(driver)
record_list = []

print("Waiting For New Record ...")
while True:
    week_num = driver.find_element(By.XPATH, "//span[@id='leagueWeekNumber']").text
    counter = driver.find_element(By.XPATH, "//div[@id='bets-time-betContdown']").text

    if counter[:2] == "00" and int(counter[-2:]) < 58 and int(counter[-2:]) > 20:
        start_time = time.time()
        print("Start data collection  ...")
        # time.sleep(2)

        match_result = driver.find_element(By.XPATH, "//div[@id='tab_id_Match_Result']").text
        # print(match_result)

        driver.find_element(By.XPATH, "//a[@id='ui-id-3']").click()
        # time.sleep(1)
        over_one = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_1_5']").text
        # print("\n", over_one)

        market_select = driver.find_element(By.XPATH, "//select[@id='bet-select-market']")

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(3)]
        action.send_keys(Keys.ENTER).perform()
        over_two = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_2_5']").text
        # print("\n", over_two)

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(4)]
        action.send_keys(Keys.ENTER).perform()
        over_three = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_3_5']").text
        # print("\n", over_three)

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(5)]
        action.send_keys(Keys.ENTER).perform()
        over_four = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_4_5']").text
        # print("\n", over_four)

        end_time = time.time()
        print("Odds Collection Time:", end_time - start_time)
        print("\nPlease Wait \nNew Records Loading ...")

        while True:
            counter = driver.find_element(By.XPATH, "//div[@id='bets-time-betContdown']").text
            # print("return -", driver.find_element(By.XPATH, "//span[@id='leagueWeekNumber']").text)
            current_week = driver.find_element(By.XPATH, "//span[@id='leagueWeekNumber']").text
            if current_week == "01":
                current_week = 39
            if counter[:2] == "00" and int(counter[-2:]) < 58 and int(counter[-2:]) > 50 and int(current_week) == int(week_num) + 1:
                print("inside IF statement")
                driver.find_element(By.XPATH, "//div[@class='ui-panel-wrapper']//i[@class='fa fa-bars icon-menu']").click()

                driver.find_element(By.XPATH, "//a[@id='a_bet_results']").click()
                time.sleep(8)
                score_result = driver.find_element(By.XPATH, "//table[@id='results-div-header-mainTable']").text
                # print(score_result)
                driver.find_element(By.XPATH, "//div[@id='results']//i[@class='fa fa-bars icon-menu']").click()
                time.sleep(2)
                driver.find_element(By.XPATH, "//li[@class='li_bet']//a[@id='a_bet_bet']").click()
                time.sleep(3)
                break
            else:
                time.sleep(1)   

        # print("week -", week_num)
        match_result = match_result.split("\n")[2::2]
        over_one = over_one.split("\n")[2::2]
        over_two = over_two.split("\n")[2::2]
        over_three = over_three.split("\n")[2::2]
        over_four = over_four.split("\n")[2::2]
        score_result = score_result.split(f"WEEK {int(current_week) - 1}")[1].split("\n")[1:11]

        temp_rec = []
        for hda, one, two, three, four, score in zip(match_result, over_one, over_two, over_three, over_four, score_result):
            data = (int(current_week) - 1,
                    hda[:3], hda[6:9], 
                    float(hda[-14:-10]), float(hda[-9:-5]), float(hda[-4:]),
                    float(one[-9:-5]), float(one[-4:]), 
                    float(two[-9:-5]), float(two[-4:]), 
                    float(three[-9:-5]), float(three[-4:]), 
                    float(four[-9:-5]), float(four[-4:]),
                    int(score[4]), int(score[6]))
            temp_rec.append(data)
            record_list.append(data)
            
        for rec in temp_rec:
            print(rec)
            
        print("\nWeek", int(current_week) - 1, "Data Collection Complete ...\n")

        if int(current_week) == 39:
            new_df = pd.DataFrame(record_list, columns=[
                "week",
                "HT", "HT", "home", "draw", "away", 
                "over_1", "under_1", 
                "over_2", "under_2",
                "over_3", "under_3",
                "over_4", "under_4",
                "H_score", "A_score"])
            new_df.to_csv("odds_data.csv", index=False)
    else:
        time.sleep(1)

        

# time.sleep(3)

# # Close the browser session
# driver.quit()

In [2]:
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import time

service = Service("chrome_driver/chromedriver.exe")
driver = webdriver.Chrome(service=service)

driver.get("https://vsmobile.bet9ja.com/mobile/themes/?sk=bet9ja&t=b61c29e6-9348-4c58-af90-378760a74693&game"
               "=league_premier&pid=14001,14003,14011,14012,14014,14015,14016,"
               "14017&v=0&text=Premier&lang=en_GB#resutls&ui_state=dialog")
# driver.maximize_window()
time.sleep(10)

action = ActionChains(driver)
record_list = []

print("Games Loading  ...\n")
while True:
    week_num = driver.find_element(By.XPATH, "//span[@id='leagueWeekNumber']").text
    counter = driver.find_element(By.XPATH, "//div[@id='bets-time-betContdown']").text

    if counter[:2] == "00" and int(counter[-2:]) < 50 and int(counter[-2:]) > 20:
        match_result = driver.find_element(By.XPATH, "//div[@id='tab_id_Match_Result']").text

        driver.find_element(By.XPATH, "//a[@id='ui-id-3']").click()
        over_one = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_1_5']").text

        market_select = driver.find_element(By.XPATH, "//select[@id='bet-select-market']")

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(3)]
        action.send_keys(Keys.ENTER).perform()
        over_two = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_2_5']").text

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(4)]
        action.send_keys(Keys.ENTER).perform()
        over_three = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_3_5']").text

        market_select.click()
        arrow_down = [ActionChains(driver).send_keys(Keys.ARROW_DOWN).perform() for _ in range(5)]
        action.send_keys(Keys.ENTER).perform()
        over_four = driver.find_element(By.XPATH, "//div[@id='tab_id_Over_Under_4_5']").text
        print("counting")
        time.sleep(70)

        while True:
            counter = driver.find_element(By.XPATH, "//div[@id='bets-time-betContdown']").text
            current_week = driver.find_element(By.XPATH, "//span[@id='leagueWeekNumber']").text
        
            if counter[:2] == "00" and int(counter[-2:]) < 58 and int(counter[-2:]) > 50:
                week_check = int(current_week) - 1
                if week_check == 0:
                    current_week = 39
                print("collecting ...")
                driver.find_element(By.XPATH, "//div[@class='ui-panel-wrapper']//i[@class='fa fa-bars icon-menu']").click()
                time.sleep(1)
                try:
                    driver.find_element(By.XPATH, "//a[@id='a_bet_results']").click()
                    time.sleep(8)
                    score_result = driver.find_element(By.XPATH, "//table[@id='results-div-header-mainTable']").text
                    driver.find_element(By.XPATH, "//div[@id='results']//i[@class='fa fa-bars icon-menu']").click()
                except ElementClickInterceptedException:
                    time.sleep(20)
                    score_result = driver.find_element(By.XPATH, "//table[@id='results-div-header-mainTable']").text
                    driver.find_element(By.XPATH, "//div[@id='results']//i[@class='fa fa-bars icon-menu']").click()
                
                time.sleep(1)
                driver.find_element(By.XPATH, "//li[@class='li_bet']//a[@id='a_bet_bet']").click()
                time.sleep(3)
                break
            else:
                time.sleep(1)   

        match_result = match_result.split("\n")[2::2]
        over_one = over_one.split("\n")[2::2]
        over_two = over_two.split("\n")[2::2]
        over_three = over_three.split("\n")[2::2]
        over_four = over_four.split("\n")[2::2]
        score_result = score_result.split(f"WEEK {int(current_week) - 1}")[1].split("\n")[1:11]

        for hda, one, two, three, four, score in zip(match_result, over_one, over_two, over_three, over_four, score_result):
            data = (int(current_week) - 1,
                    hda[:3], hda[6:9],
                    float(hda[-14:-10]), float(hda[-9:-5]), float(hda[-4:]),
                    float(one[-9:-5]), float(one[-4:]),
                    float(two[-9:-5]), float(two[-4:]),
                    float(three[-9:-5]), float(three[-4:]),
                    float(four[-9:-5]), float(four[-4:]),
                    int(score[4]), int(score[6]))
            record_list.append(data)
            
        print("Week", int(current_week) - 1, "Data Collection Complete ...")

        if int(current_week) == 39:
            new_df = pd.DataFrame(record_list, columns=[
                "week",
                "HT", "HT", "home", "draw", "away", 
                "over_1", "under_1", 
                "over_2", "under_2",
                "over_3", "under_3",
                "over_4", "under_4",
                "H_score", "A_score"])
            new_df.to_csv("odds_data.csv", index=False)
    else:
        time.sleep(1)


# # Close the browser session
# driver.quit()

Games Loading  ...



In [2]:
new_df = pd.DataFrame(record_list, columns=[
    "week",
    "HT", "HT", "home", "draw", "away", 
    "over_1", "under_1", 
    "over_2", "under_2",
    "over_3", "under_3",
    "over_4", "under_4",
    "H_score", "A_score"])

new_df.to_csv("test_7_odds_data.csv", index=False)