In [16]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'Dinamo Zagreb vs Istra', 'win': 0.77, 'draw': 0.19, 'loss': 0.04, 'win_odds': 1.30, 'draw_odds': 5.26, 'loss_odds': 25.00},
    {'team': 'Slavia Prague vs Liberec', 'win': 0.70, 'draw': 0.21, 'loss': 0.09, 'win_odds': 1.43, 'draw_odds': 4.76, 'loss_odds': 11.11},
    {'team': 'Sparta Prague vs Dukla Praha', 'win': 0.76, 'draw': 0.19, 'loss': 0.05, 'win_odds': 1.32, 'draw_odds': 5.26, 'loss_odds': 20.00},
    {'team': 'BATE Borisov vs Minsk', 'win': 0.65, 'draw': 0.21, 'loss': 0.14, 'win_odds': 1.54, 'draw_odds': 4.76, 'loss_odds': 7.14},
    {'team': 'Benfica vs Fulham', 'win': 0.55, 'draw': 0.25, 'loss': 0.20, 'win_odds': 1.80, 'draw_odds': 4.00, 'loss_odds': 5.00},
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
]

# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "Dinamo Zagreb vs Istra": "win",
    "Slavia Prague vs Liberec": "win",
    "Sparta Prague vs Dukla Praha": "win",
    "BATE Borisov vs Minsk": "loss",
    "Benfica vs Fulham": "draw",
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss"
}

# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows

Unnamed: 0,Match 1 (Dinamo Zagreb vs Istra),Match 2 (Slavia Prague vs Liberec),Match 3 (Sparta Prague vs Dukla Praha),Match 4 (BATE Borisov vs Minsk),Match 5 (Benfica vs Fulham),Match 6 (Gornik Zabrze vs Pogon),Match 7 (Fortuna Sittard vs De Graafschap),Total Probability,Total Odds,Expected Value
115,win,win,win,loss,draw,loss,loss,0.000877,1145.601691,0.006084


In [3]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'Slavia Prague vs Liberec', 'win': 0.70, 'draw': 0.21, 'loss': 0.09, 'win_odds': 1.43, 'draw_odds': 4.76, 'loss_odds': 11.11},
    {'team': 'Sparta Prague vs Dukla Praha', 'win': 0.76, 'draw': 0.19, 'loss': 0.05, 'win_odds': 1.32, 'draw_odds': 5.26, 'loss_odds': 20.00},
    {'team': 'BATE Borisov vs Minsk', 'win': 0.65, 'draw': 0.21, 'loss': 0.14, 'win_odds': 1.54, 'draw_odds': 4.76, 'loss_odds': 7.14},
    {'team': 'Benfica vs Fulham', 'win': 0.55, 'draw': 0.25, 'loss': 0.20, 'win_odds': 1.80, 'draw_odds': 4.00, 'loss_odds': 5.00},
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
    {'team': 'Falkirk vs Queens Park FC', 'win': 0.22, 'draw': 0.27, 'loss': 0.51, 'win_odds': 4.55, 'draw_odds': 3.70, 'loss_odds': 1.96},
]


# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "Slavia Prague vs Liberec": "win",
    "Sparta Prague vs Dukla Praha": "win",
    "BATE Borisov vs Minsk": "loss",
    "Benfica vs Fulham": "draw",
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss",
    "Falkirk vs Queens Park FC": "win"
}


# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows


Unnamed: 0,Match 1 (Slavia Prague vs Liberec),Match 2 (Sparta Prague vs Dukla Praha),Match 3 (BATE Borisov vs Minsk),Match 4 (Benfica vs Fulham),Match 5 (Gornik Zabrze vs Pogon),Match 6 (Fortuna Sittard vs De Graafschap),Match 7 (Falkirk vs Queens Park FC),Total Probability,Total Odds,Expected Value
154,win,win,loss,draw,loss,loss,win,0.000251,4009.605919,0.005458


In [4]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'Sparta Prague vs Dukla Praha', 'win': 0.76, 'draw': 0.19, 'loss': 0.05, 'win_odds': 1.32, 'draw_odds': 5.26, 'loss_odds': 20.00},
    {'team': 'BATE Borisov vs Minsk', 'win': 0.65, 'draw': 0.21, 'loss': 0.14, 'win_odds': 1.54, 'draw_odds': 4.76, 'loss_odds': 7.14},
    {'team': 'Benfica vs Fulham', 'win': 0.55, 'draw': 0.25, 'loss': 0.20, 'win_odds': 1.80, 'draw_odds': 4.00, 'loss_odds': 5.00},
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
    {'team': 'Falkirk vs Queens Park FC', 'win': 0.22, 'draw': 0.27, 'loss': 0.51, 'win_odds': 4.55, 'draw_odds': 3.70, 'loss_odds': 1.96},
    {'team': 'Almere vs Utrecht', 'win': 0.10, 'draw': 0.39, 'loss': 0.51, 'win_odds': 10.00, 'draw_odds': 2.56, 'loss_odds': 1.96},
]


# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "Sparta Prague vs Dukla Praha": "win",
    "BATE Borisov vs Minsk": "loss",
    "Benfica vs Fulham": "draw",
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss",
    "Falkirk vs Queens Park FC": "win",
    "Almere vs Utrecht": "loss"
}


# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows


Unnamed: 0,Match 1 (Sparta Prague vs Dukla Praha),Match 2 (BATE Borisov vs Minsk),Match 3 (Benfica vs Fulham),Match 4 (Gornik Zabrze vs Pogon),Match 5 (Fortuna Sittard vs De Graafschap),Match 6 (Falkirk vs Queens Park FC),Match 7 (Almere vs Utrecht),Total Probability,Total Odds,Expected Value
222,win,loss,draw,loss,loss,win,loss,0.000183,5495.683637,0.003984


In [5]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'BATE Borisov vs Minsk', 'win': 0.65, 'draw': 0.21, 'loss': 0.14, 'win_odds': 1.54, 'draw_odds': 4.76, 'loss_odds': 7.14},
    {'team': 'Benfica vs Fulham', 'win': 0.55, 'draw': 0.25, 'loss': 0.20, 'win_odds': 1.80, 'draw_odds': 4.00, 'loss_odds': 5.00},
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
    {'team': 'Falkirk vs Queens Park FC', 'win': 0.22, 'draw': 0.27, 'loss': 0.51, 'win_odds': 4.55, 'draw_odds': 3.70, 'loss_odds': 1.96},
    {'team': 'Almere vs Utrecht', 'win': 0.10, 'draw': 0.39, 'loss': 0.51, 'win_odds': 10.00, 'draw_odds': 2.56, 'loss_odds': 1.96},
    {'team': 'Waterford vs Shamrock', 'win': 0.25, 'draw': 0.30, 'loss': 0.45, 'win_odds': 4.00, 'draw_odds': 3.33, 'loss_odds': 2.22},
]

# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "BATE Borisov vs Minsk": "loss",
    "Benfica vs Fulham": "draw",
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss",
    "Falkirk vs Queens Park FC": "win",
    "Almere vs Utrecht": "loss",
    "Waterford vs Shamrock": "loss"
}


# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows


Unnamed: 0,Match 1 (BATE Borisov vs Minsk),Match 2 (Benfica vs Fulham),Match 3 (Gornik Zabrze vs Pogon),Match 4 (Fortuna Sittard vs De Graafschap),Match 5 (Falkirk vs Queens Park FC),Match 6 (Almere vs Utrecht),Match 7 (Waterford vs Shamrock),Total Probability,Total Odds,Expected Value
648,loss,draw,loss,loss,win,loss,loss,0.000108,9242.740663,-0.000293


In [6]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'Benfica vs Fulham', 'win': 0.55, 'draw': 0.25, 'loss': 0.20, 'win_odds': 1.80, 'draw_odds': 4.00, 'loss_odds': 5.00},
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
    {'team': 'Falkirk vs Queens Park FC', 'win': 0.22, 'draw': 0.27, 'loss': 0.51, 'win_odds': 4.55, 'draw_odds': 3.70, 'loss_odds': 1.96},
    {'team': 'Almere vs Utrecht', 'win': 0.10, 'draw': 0.39, 'loss': 0.51, 'win_odds': 10.00, 'draw_odds': 2.56, 'loss_odds': 1.96},
    {'team': 'Waterford vs Shamrock', 'win': 0.25, 'draw': 0.30, 'loss': 0.45, 'win_odds': 4.00, 'draw_odds': 3.33, 'loss_odds': 2.22},
    {'team': 'Otelul vs Hermannstadt', 'win': 0.42, 'draw': 0.41, 'loss': 0.17, 'win_odds': 2.38, 'draw_odds': 2.44, 'loss_odds': 5.88},
]

# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "Benfica vs Fulham": "draw",
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss",
    "Falkirk vs Queens Park FC": "win",
    "Almere vs Utrecht": "loss",
    "Waterford vs Shamrock": "loss",
    "Otelul vs Hermannstadt": "win"
}


# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows


Unnamed: 0,Match 1 (Benfica vs Fulham),Match 2 (Gornik Zabrze vs Pogon),Match 3 (Fortuna Sittard vs De Graafschap),Match 4 (Falkirk vs Queens Park FC),Match 5 (Almere vs Utrecht),Match 6 (Waterford vs Shamrock),Match 7 (Otelul vs Hermannstadt),Total Probability,Total Odds,Expected Value
495,draw,loss,loss,win,loss,loss,win,0.000324,3080.913554,-7.7e-05


In [7]:
import itertools
import pandas as pd

# Step 1: Define probabilities and odds for each match
matches = [
    {'team': 'Gornik Zabrze vs Pogon', 'win': 0.35, 'draw': 0.29, 'loss': 0.36, 'win_odds': 2.86, 'draw_odds': 3.45, 'loss_odds': 2.78},
    {'team': 'Fortuna Sittard vs De Graafschap', 'win': 0.52, 'draw': 0.31, 'loss': 0.17, 'win_odds': 1.92, 'draw_odds': 3.23, 'loss_odds': 5.88},
    {'team': 'Falkirk vs Queens Park FC', 'win': 0.22, 'draw': 0.27, 'loss': 0.51, 'win_odds': 4.55, 'draw_odds': 3.70, 'loss_odds': 1.96},
    {'team': 'Almere vs Utrecht', 'win': 0.10, 'draw': 0.39, 'loss': 0.51, 'win_odds': 10.00, 'draw_odds': 2.56, 'loss_odds': 1.96},
    {'team': 'Waterford vs Shamrock', 'win': 0.25, 'draw': 0.30, 'loss': 0.45, 'win_odds': 4.00, 'draw_odds': 3.33, 'loss_odds': 2.22},
    {'team': 'Otelul vs Hermannstadt', 'win': 0.42, 'draw': 0.41, 'loss': 0.17, 'win_odds': 2.38, 'draw_odds': 2.44, 'loss_odds': 5.88},
    {'team': 'NK Radomlje vs NK Domzale', 'win': 0.40, 'draw': 0.28, 'loss': 0.32, 'win_odds': 2.50, 'draw_odds': 3.57, 'loss_odds': 3.13},
]


# Step 2: Generate all possible combinations of outcomes (win, draw, loss)
outcomes = ['win', 'draw', 'loss']
combinations = list(itertools.product(outcomes, repeat=len(matches)))

# Step 3: Calculate total probability, total odds, and expected value for each combination
results = []
for combo in combinations:
    total_prob = 1.0
    total_odds = 1.0
    for i, outcome in enumerate(combo):
        match = matches[i]
        total_prob *= match[outcome]
        total_odds *= match[f'{outcome}_odds']
    # Calculate the expected value for the combination
    expected_value = (total_prob * total_odds) - (1 - total_prob)
    results.append((combo, total_prob, total_odds, expected_value))

# Step 4: Sort the results by Expected Value, then by Total Probability, then by Total Odds
results.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)

# Step 5: Track the counts of each outcome type
outcome_counts = {outcome: 0 for outcome in outcomes}
selected_combinations = []

# Define a threshold for max number of combinations to select
max_combinations = 3000  # You can adjust this value

for result in results:
    combo, prob, odds, ev = result
    selected_combinations.append(result)
    for outcome in combo:
        outcome_counts[outcome] += 1

    # Break if we've selected enough combinations
    if len(selected_combinations) >= max_combinations:
        break

# Step 6: Create a DataFrame with each match in its own column, including team names and outcomes
columns = [f'Match {i+1} ({matches[i]["team"]})' for i in range(len(matches))]
columns.extend(['Total Probability', 'Total Odds', 'Expected Value'])
data = []

for combo, prob, odds, ev in selected_combinations:
    data.append(list(combo) + [prob, odds, ev])

df = pd.DataFrame(data, columns=columns)

# Step 11: Backtest Against Actual Results

# Actual results (you've already provided this JSON)
actual_results = {
    "Gornik Zabrze vs Pogon": "loss",
    "Fortuna Sittard vs De Graafschap": "loss",
    "Falkirk vs Queens Park FC": "win",
    "Almere vs Utrecht": "loss",
    "Waterford vs Shamrock": "loss",
    "Otelul vs Hermannstadt": "win",
    "NK Radomlje vs NK Domzale": "win"
}

# Step 12: Filter combinations to match actual results
filtered_combinations = []
matching_row_indices = []  # To store indices of matching rows in df

for idx, (combo, prob, odds, ev) in enumerate(selected_combinations):
    matches_actual_results = True
    for i, outcome in enumerate(combo):
        match_name = matches[i]['team']
        if outcome != actual_results[match_name]:
            matches_actual_results = False
            break
    
    if matches_actual_results:
        filtered_combinations.append((combo, prob, odds, ev))
        matching_row_indices.append(idx)  # Store the index of the matching row in df

# If no filtered combinations match the actual results, return None
if not filtered_combinations:
    print("No selected combinations matched the actual results.")
    matching_rows = None
else:
    # Return the matching rows from the original DataFrame
    matching_rows = df.iloc[matching_row_indices]

# Now, instead of printing the matching rows, return them
matching_rows


Unnamed: 0,Match 1 (Gornik Zabrze vs Pogon),Match 2 (Fortuna Sittard vs De Graafschap),Match 3 (Falkirk vs Queens Park FC),Match 4 (Almere vs Utrecht),Match 5 (Waterford vs Shamrock),Match 6 (Otelul vs Hermannstadt),Match 7 (NK Radomlje vs NK Domzale),Total Probability,Total Odds,Expected Value
920,loss,loss,win,loss,loss,win,win,0.000519,1925.570971,0.000118
