In [5]:
import pandas as pd
import numpy as np

# Load the three CSV files
profiles = pd.read_csv('../CSV/19.01.2026/profiles.csv')
predictions = pd.read_csv('../CSV/04.02.2026/predictions_rows.csv')
fixtures = pd.read_csv('../CSV/04.02.2026/fixtures_rows (1).csv')

# Merge predictions with profiles (user_id ‚Üí id)
merged = predictions.merge(profiles, left_on='user_id', right_on='id', suffixes=('_pred', '_profile'))

# Merge with fixtures (fixture_id ‚Üí id)
merged = merged.merge(fixtures, left_on='fixture_id', right_on='id', suffixes=('', '_fixture'))

# Select and rename columns as requested
result = merged[['username', 'home_prediction', 'away_prediction', 'home_team', 'away_team', 'home_score', 'away_score']]
result.columns = ['Player Name', 'Home Prediction', 'Away Prediction', 'Home Team', 'Away Team', 'Home Score', 'Away Score']

# Display the result
print(f"Merged DataFrame shape: {result.shape}")
print("\nFirst 10 rows:")
print(result.head(10))

Merged DataFrame shape: (7640, 7)

First 10 rows:
        Player Name  Home Prediction  Away Prediction               Home Team  \
0       üç∫The Barman                0                2         AFC Bournemouth   
1         Stephen O                1                2            Leeds United   
2       Jim Shirley                1                1                 Everton   
3      Steve arnold                1                3                West Ham   
4        Mjd-‚öíÔ∏è‚öíÔ∏è‚öíÔ∏è                0                2                 Burnley   
5  Graham Dongworth                2                1  Brighton & Hove Albion   
6              Si B                3                0               Tottenham   
7        Mjd-‚öíÔ∏è‚öíÔ∏è‚öíÔ∏è                2                2          Crystal Palace   
8              Si B                5                0         Manchester City   
9      Nick Arnold                 1                3         West Ham United   

           Away Team  Home Scor

In [6]:
# Clean up: remove rows with null/NaN values in scores or predictions
print(f"Before cleanup: {result.shape[0]} rows")

result_clean = result.dropna(subset=['Home Score', 'Away Score', 'Home Prediction', 'Away Prediction']).copy()

# Remove duplicate rows
before_dupes = result_clean.shape[0]
result_clean = result_clean.drop_duplicates()

print(f"After removing NaN: {result_clean.shape[0]} rows")
print(f"Removed {result.shape[0] - result_clean.shape[0]} rows")
print(f"Removed {before_dupes - result_clean.shape[0]} duplicate rows")

# Convert scores from float to integer
result_clean['Home Score'] = result_clean['Home Score'].astype(int)
result_clean['Away Score'] = result_clean['Away Score'].astype(int)

# Display cleaned result
print("\nCleaned data (first 10 rows):")
print(result_clean.head(10))
print(f"\nData types:")
print(result_clean.dtypes)

Before cleanup: 7640 rows
After removing NaN: 7485 rows
Removed 155 rows
Removed 5 duplicate rows

Cleaned data (first 10 rows):
        Player Name  Home Prediction  Away Prediction               Home Team  \
0       üç∫The Barman                0                2         AFC Bournemouth   
1         Stephen O                1                2            Leeds United   
2       Jim Shirley                1                1                 Everton   
3      Steve arnold                1                3                West Ham   
4        Mjd-‚öíÔ∏è‚öíÔ∏è‚öíÔ∏è                0                2                 Burnley   
5  Graham Dongworth                2                1  Brighton & Hove Albion   
6              Si B                3                0               Tottenham   
7        Mjd-‚öíÔ∏è‚öíÔ∏è‚öíÔ∏è                2                2          Crystal Palace   
8              Si B                5                0         Manchester City   
9      Nick Arnold                

In [7]:
# Keep only exact scoreline predictions (both home and away match)
correct_predictions = result_clean[
    (result_clean['Home Prediction'] == result_clean['Home Score'])
    & (result_clean['Away Prediction'] == result_clean['Away Score'])
].copy()

print(f"Correct predictions count: {correct_predictions.shape[0]}")
print("\nFirst 10 correct predictions:")
print(correct_predictions.head(10))

Correct predictions count: 630

First 10 correct predictions:
          Player Name  Home Prediction  Away Prediction     Home Team  \
1           Stephen O                1                2  Leeds United   
4          Mjd-‚öíÔ∏è‚öíÔ∏è‚öíÔ∏è                0                2       Burnley   
6                Si B                3                0     Tottenham   
28   Graham Dongworth                2                0    Sunderland   
38           SteveJos                1                1       Everton   
70       RogerStanton                2                1       Arsenal   
76   Alan Taylor-Reed                1                1       Burnley   
89               Si B                1                1    Sunderland   
94       Des McCarthy                1                1       Chelsea   
115         Stephen O                1                2      West Ham   

                   Away Team  Home Score  Away Score  
1          Tottenham Hotspur           1           2  
4           

In [8]:
# Sort by highest correct prediction total
sorted_correct = correct_predictions.copy()
sorted_correct['Correct goals predicted'] = (
    sorted_correct['Home Prediction'] + sorted_correct['Away Prediction']
)

sorted_correct = sorted_correct.sort_values(by='Correct goals predicted', ascending=False)

print("Top 10 by Correct goals predicted:")
print(sorted_correct.head(10))

Top 10 by Correct goals predicted:
        Player Name  Home Prediction  Away Prediction          Home Team  \
7001        Parish                 5                1    Manchester City   
5886      Andy Page                3                3            Wrexham   
4867         big k                 3                2  Manchester United   
4244    Rod McGeady                3                2    AFC Bournemouth   
869    Chris Torode                4                1            Arsenal   
4158  Lenny Wright                 3                2            Chelsea   
4600   Chris Torode                3                2  Manchester United   
5758       SteveJos                3                2            Chelsea   
1179       Martin H                2                3          Newcastle   
1182        Parish                 3                2          Brentford   

              Away Team  Home Score  Away Score  Correct goals predicted  
7001            Burnley           5           1      

In [9]:
# Show only the top result(s) by Correct goals predicted
max_correct = sorted_correct['Correct goals predicted'].max()

best = sorted_correct[sorted_correct['Correct goals predicted'] == max_correct].copy()

print(f"Top Correct goals predicted: {max_correct}")
print(f"Number of top results: {best.shape[0]}")

# Make the output more presentable
best_display = best[['Player Name', 'Home Team', 'Away Team', 'Home Score', 'Away Score',
                     'Home Prediction', 'Away Prediction', 'Correct goals predicted']]

styled_best = (
    best_display.style.set_table_styles([
        {"selector": "th", "props": [("background-color", "#4472C4"), ("color", "white"), ("border", "1px solid black"), ("font-weight", "bold")]},
        {"selector": "td", "props": [("border", "1px solid black"), ("color", "#1a1a1a"), ("padding", "8px")]},
        {"selector": "tr:nth-child(even) td", "props": [("background-color", "#F2F2F2"), ("color", "#1a1a1a")]},
        {"selector": "tr:nth-child(odd) td", "props": [("background-color", "#FFFFFF"), ("color", "#1a1a1a")]},
    ])
)

display(styled_best)

Top Correct goals predicted: 6
Number of top results: 2


Unnamed: 0,Player Name,Home Team,Away Team,Home Score,Away Score,Home Prediction,Away Prediction,Correct goals predicted
7001,Parish,Manchester City,Burnley,5,1,5,1,6
5886,Andy Page,Wrexham,Nottingham Forest,3,3,3,3,6
