after getting the match ids 
the career tuple and recent form tuple is generated 
career tuple is tuple from career data file where match_id, player_id matches
same for recent form tuple 

then we generate the venue tuple 
from venue data file where player_id and match_id matches 

if data is not there we return a tuple of length 12 for  whichever  of career , venue and recent form is not generated  

In [42]:
import pandas as pd
from typing import List, Dict, Tuple

def generate_tuple_from_data(data: pd.DataFrame, player_id: str, match_id: str) -> Tuple:
    """
    Generate a tuple from the given data based on player_id and match_id.
    If no matching data is found, return a tuple of length 10 filled with 0s.
    """
    # print(f"Generating tuple for player_id={player_id} and match_id={match_id}")  # Debugging line
    matching_data = data[(data['player_id'] == player_id) & (data['match_id'] == match_id)]
    
    if matching_data.empty:
        print(f"No matching data found for player_id={player_id} and match_id={match_id}")  # Debugging line
        return tuple([0] * (len(data.columns) - 2))  # Return a tuple with zeros if no match is found (excluding player_id and match_id)
    
    # Extract the relevant data as a tuple (excluding player_id and match_id)
    # print(f"Found matching data for player_id={player_id} and match_id={match_id}: {matching_data.iloc[0].values}")  # Debugging line
    return tuple(matching_data.iloc[0].drop(['player_id', 'match_id']).values)

def calculate_player_features(player_id: str, match_date: str, venue: str, feature_names: Dict[str, List[str]]) -> List[float]:
    """
    Calculate the feature vector for a player based on player_id, match_date, and venue.
    This should return a list of features (numerical values) for the player.
    """
    # print(f"Calculating features for player_id={player_id}, match_date={match_date}, venue={venue}")  # Debugging line
    
    # Load the necessary CSV files using pandas
    venue_data = pd.read_csv('../../CSVs/Final/ODI/venue_odi.csv',low_memory=False)
    recent_data = pd.read_csv('../../CSVs/Final/ODI/recent_odi.csv',low_memory=False)
    player_match_data = pd.read_csv('../../CSVs/Final/ODI/player_match_data_odi.csv',low_memory=False)
    career_data = pd.read_csv('../../CSVs/Final/ODI/career_odi.csv',low_memory=False)
    
    # Convert the 'date' column to datetime format for easier comparison
    player_match_data['date'] = pd.to_datetime(player_match_data['date'])
    input_date = pd.to_datetime(match_date)
    
    # Filter player match data for the given player_id
    player_data = player_match_data[player_match_data['player_id'] == player_id]
    
    # Find the match_id with the closest date <= input date (regardless of venue)
    match_id_1 = player_data[player_data['date'] <= input_date].sort_values(by='date', ascending=False).iloc[0]['match_id']
    # print(f"Match ID (closest date) for player {player_id} and match date {match_date}: {match_id_1}")  # Debugging line
    
    # Find the match_id with the closest date <= input date and matching the input venue
    match_id_2 = player_data[(player_data['date'] <= input_date) & (player_data['venue'] == venue)].sort_values(by='date', ascending=False).iloc[0]['match_id']
    # print(f"Match ID (closest date and venue) for player {player_id} and match date {match_date}: {match_id_2}")  # Debugging line
    
    # Generate the career and recent form tuples (without player_id and match_id)
    career_tuple = generate_tuple_from_data(career_data, player_id, match_id_1)
    recent_form_tuple = generate_tuple_from_data(recent_data, player_id, match_id_2)
    
    # Generate the venue tuple (without player_id and match_id)
    venue_tuple = generate_tuple_from_data(venue_data, player_id, match_id_1)
    
    # Combine the tuples to form the feature vector
    feature_vector = list(career_tuple) + list(recent_form_tuple) + list(venue_tuple)
    # print(f"Generated feature vector for player {player_id}: {feature_vector}")  # Debugging line
    
    return feature_vector

def generate_features_for_all_players(players_data: List[dict], match_date: str, venue: str) -> pd.DataFrame:
    """
    Generate feature vectors for all players based on the provided list of player data, match date, and venue.
    Returns a DataFrame with player_id, player_name, and corresponding feature values.
    """
    # print(f"Generating features for all players for match_date={match_date} and venue={venue}")  # Debugging line
    
    # Load feature names from the relevant CSV files
    career_data = pd.read_csv('../../CSVs/Final/ODI/career_odi.csv',dtype={
    'match_id': str,
    'player_id': str,
    'career_batsman_100s_odi': int,
    'career_batsman_50s_odi': int,
    'career_batsman_average_runs_odi': float,
    'career_batsman_strike_rate_odi': float,
    'career_batsman_total_runs_odi': int,
    'career_bowler_average_odi': float,
    'career_bowler_economy_rate_odi': float,
    'career_bowler_wickets_odi': int,
    'career_fielder_total_catches_odi': int,
    'career_fielder_total_runouts_odi': int,
    'career_batsman_total_fours_odi': int,
    'career_batsman_total_sixes_odi': int})

    recent_data = pd.read_csv('../../CSVs/Final/ODI/recent_odi.csv',dtype={
    'match_id': str,
    'player_id': str,
    'recent_batsman_100s_odi': int,
    'recent_batsman_50s_odi': int,
    'recent_batsman_average_runs_odi': float,
    'recent_batsman_strike_rate_odi': float,
    'recent_batsman_total_runs_odi': int,
    'recent_bowler_average_odi': float,
    'recent_bowler_economy_rate_odi': float,
    'recent_bowler_wickets_odi': int,
    'recent_fielder_total_catches_odi': int,
    'recent_fielder_total_runouts_odi': int,
    'recent_batsman_total_fours_odi': int,
    'recent_batsman_total_sixes_odi': int})

    venue_data = pd.read_csv('../../CSVs/Final/ODI/venue_odi.csv',dtype={
    'match_id': str,
    'player_id': str,
    'venue_batsman_100s_odi': int,
    'venue_batsman_50s_odi': int,
    'venue_batsman_average_runs_odi': float,
    'venue_batsman_strike_rate_odi': float,
    'venue_batsman_total_runs_odi': int,
    'venue_bowler_average_odi': float,
    'venue_bowler_economy_rate_odi': float,
    'venue_bowler_wickets_odi': int,
    'venue_fielder_total_catches_odi': int,
    'venue_fielder_total_runouts_odi': int,
    'venue_batsman_total_fours_odi': int,
    'venue_batsman_total_sixes_odi': int})
    
    # Extract feature names by removing player_id and match_id
    career_feature_names = [col for col in career_data.columns if col not in ['player_id', 'match_id']]
    recent_feature_names = [col for col in recent_data.columns if col not in ['player_id', 'match_id']]
    venue_feature_names = [col for col in venue_data.columns if col not in ['player_id', 'match_id']]
    
    # Combine all feature names
    all_feature_names = career_feature_names + recent_feature_names + venue_feature_names
    # print(f"All feature names: {all_feature_names}")  # Debugging line
    
    feature_rows = []  # List to store feature rows for DataFrame
    
    # Loop through each player in the list
    for player in players_data:
        player_id = player['player_id']
        player_name = player['player_name']
        
        # Step 1: Calculate feature vector for the player
        feature_vector = calculate_player_features(player_id, match_date, venue, all_feature_names)
        
        # Step 2: Append each feature with the player_id, player_name, and corresponding feature name to feature_rows
        for i, feature_value in enumerate(feature_vector):
            feature_name = all_feature_names[i]  # Get actual feature name
            feature_rows.append({
                'player_id': player_id,
                'player_name': player_name,
                'feature_name': feature_name,
                'feature_value': feature_value
            })
    
    # Step 3: Convert the feature rows to a pandas DataFrame
    features_df = pd.DataFrame(feature_rows)
    # print(f"Generated feature rows for all players: {features_df.head()}")  # Debugging line
    
    # Pivot the DataFrame to get the desired output format: player_id, player_name, feature_1, feature_2, ...
    features_df_pivot = features_df.pivot_table(index=['player_id', 'player_name'], columns='feature_name', values='feature_value', aggfunc='first')
    
    # Reset index to flatten the DataFrame
    features_df_pivot.reset_index(inplace=True)
    
    return features_df_pivot

players_data = [
    {'player_id': '865eae1f', 'player_name': 'ML Green', 'role': 'Wicket-Keeper', 'team_name': 'New Zealand'},
{'player_id': '7673c908', 'player_name': 'AC Kerr', 'role': 'All-Rounder', 'team_name': 'New Zealand'},
{'player_id': '5bf87532', 'player_name': 'Lata Mondal', 'role': 'All-Rounder', 'team_name': 'Bangladesh'},
{'player_id': '6df697a9', 'player_name': 'FC Jonas', 'role': 'Bowler', 'team_name': 'New Zealand'},
{'player_id': '558cdd64', 'player_name': 'Fahima Khatun', 'role': 'Bowler', 'team_name': 'Bangladesh'},
{'player_id': '6e96c956', 'player_name': 'Fargana Hoque', 'role': 'Batsman', 'team_name': 'Bangladesh'},
{'player_id': '8ca1f93a', 'player_name': 'HM Rowe', 'role': 'Bowler', 'team_name': 'New Zealand'},
{'player_id': 'b8cce99f', 'player_name': 'HNK Jensen', 'role': 'Bowler', 'team_name': 'New Zealand'},
{'player_id': '9af58032', 'player_name': 'JM Kerr', 'role': 'Bowler', 'team_name': 'New Zealand'},
{'player_id': 'f2670a3d', 'player_name': 'Dilara Akter', 'role': 'Batsman', 'team_name': 'Bangladesh'},
{'player_id': '752183c1', 'player_name': 'Jahanara Alam', 'role': 'Bowler', 'team_name': 'Bangladesh'},
{'player_id': '7072542f', 'player_name': 'JT McFadyen', 'role': 'Wicket-Keeper', 'team_name': 'New Zealand'},
{'player_id': 'a50797a4', 'player_name': 'Marufa Akter', 'role': 'Bowler', 'team_name': 'Bangladesh'},
{'player_id': '712bbd92', 'player_name': 'Murshida Khatun', 'role': 'Batsman', 'team_name': 'Bangladesh'},
{'player_id': '66078174', 'player_name': 'Nahida Akter', 'role': 'Bowler', 'team_name': 'Bangladesh'},
{'player_id': '3827f6a2', 'player_name': 'Nigar Sultana', 'role': 'Wicket-Keeper', 'team_name': 'Bangladesh'},
{'player_id': '645b2a2f', 'player_name': 'MM Penfold', 'role': 'Bowler', 'team_name': 'New Zealand'},
{'player_id': 'de69af96', 'player_name': 'SFM Devine', 'role': 'All-Rounder', 'team_name': 'New Zealand'},
{'player_id': '89bbc67a', 'player_name': 'SW Bates', 'role': 'All-Rounder', 'team_name': 'New Zealand'},
{'player_id': '037f2b86', 'player_name': 'Salma Khatun', 'role': 'Bowler', 'team_name': 'Bangladesh'},
{'player_id': '19147fe0', 'player_name': 'LR Down', 'role': 'Batsman', 'team_name': 'New Zealand'},
{'player_id': 'ad3940a5', 'player_name': 'Ritu Moni', 'role': 'Bowler', 'team_name': 'Bangladesh'},
]

# Get recommended team for the given match date, venue, and tournament type
match_date = '2022-12-17'
venue = 'Hamilton'
tournament_type = 'ODI'


# Generate feature vectors for all players
feature_df = generate_features_for_all_players(players_data, match_date, venue)
print(f"Generated feature dataframe: {feature_df.head(22)}")




Generated feature dataframe: feature_name player_id      player_name  career_batsman_100s_odi  \
0             037f2b86     Salma Khatun                      0.0   
1             19147fe0          LR Down                      0.0   
2             3827f6a2    Nigar Sultana                      0.0   
3             558cdd64    Fahima Khatun                      0.0   
4             5bf87532      Lata Mondal                      0.0   
5             645b2a2f       MM Penfold                      0.0   
6             66078174     Nahida Akter                      0.0   
7             6df697a9         FC Jonas                      0.0   
8             6e96c956    Fargana Hoque                      0.0   
9             7072542f      JT McFadyen                      0.0   
10            712bbd92  Murshida Khatun                      0.0   
11            752183c1    Jahanara Alam                      0.0   
12            7673c908          AC Kerr                      1.0   
13            865ea

now write the predict fantasy points function 

load the model and return predicted fantasy points 


In [43]:
import pandas as pd
import joblib
from typing import List

def predict_fantasy_points_for_all_players(players_df: pd.DataFrame) -> pd.DataFrame:
    """
    Predict the fantasy points for all players given their feature vectors.
    
    Args:
    - players_df (pd.DataFrame): A DataFrame containing player data (player_id, player_name, and feature columns).
    
    Returns:
    - pd.DataFrame: A DataFrame with player_id, player_name, and predicted fantasy points for each player.
    """
    try:
        print("Starting the prediction process...")

        # Load the pre-trained model (replace with the correct path to your model)
        print("Loading the pre-trained model...")
        model = joblib.load('../../Code/Model_Final/ODI/model_odi.pkl')
        print("Model loaded successfully.")

        # Extract features from the DataFrame (assuming the feature columns start from the 3rd column onward)
        feature_columns = players_df.columns[2:]  # Skip player_id and player_name
        print(f"Extracted feature columns: {feature_columns}")

        # Check if the correct number of features are present
        print(f"Number of features in input DataFrame: {len(feature_columns)}")
        print("Input DataFrame preview:")
        print(players_df.head())

        # Predict fantasy points for all players
        print("Predicting fantasy points for all players...")
        predicted_fantasy_points = model.predict(players_df[feature_columns])
        print(f"Predictions completed. Number of predictions: {len(predicted_fantasy_points)}")

        # Add predicted fantasy points to the DataFrame
        players_df['predicted_fantasy_points'] = predicted_fantasy_points
        print("Predicted fantasy points added to DataFrame.")

        # Create the resulting DataFrame with only player_id, player_name, and predicted_fantasy_points
        results_df = players_df[['player_id', 'player_name', 'predicted_fantasy_points']]
        print("Result DataFrame created with player_id, player_name, and predicted_fantasy_points.")

        # Display preview of results
        print("Result DataFrame preview:")
        print(results_df.head())
        
        return results_df
    
    except Exception as e:
        print(f"Error while loading model or predicting fantasy points: {e}")
        return pd.DataFrame(columns=['player_id', 'player_name', 'predicted_fantasy_points'])



In [44]:


players_df = feature_df

# Get predicted fantasy points for all players
predicted_fantasy_points_df = predict_fantasy_points_for_all_players(players_df)

# Display the result
print(predicted_fantasy_points_df)


Starting the prediction process...
Loading the pre-trained model...
Model loaded successfully.
Extracted feature columns: Index(['career_batsman_100s_odi', 'career_batsman_50s_odi',
       'career_batsman_average_runs_odi', 'career_batsman_strike_rate_odi',
       'career_batsman_total_fours_odi', 'career_batsman_total_runs_odi',
       'career_batsman_total_sixes_odi', 'career_bowler_average_odi',
       'career_bowler_economy_rate_odi', 'career_bowler_wickets_odi',
       'career_fielder_total_catches_odi', 'career_fielder_total_runouts_odi',
       'recent_batsman_100s_odi', 'recent_batsman_50s_odi',
       'recent_batsman_average_runs_odi', 'recent_batsman_strike_rate_odi',
       'recent_batsman_total_fours_odi', 'recent_batsman_total_runs_odi',
       'recent_batsman_total_sixes_odi', 'recent_bowler_average_odi',
       'recent_bowler_economy_rate_odi', 'recent_bowler_wickets_odi',
       'recent_fielder_total_catches_odi', 'recent_fielder_total_runouts_odi',
       'venue_batsma

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [40]:
import pandas as pd
import pickle
from typing import List

def predict_fantasy_points_for_all_players(players_df: pd.DataFrame, feature_columns: List[str]) -> pd.DataFrame:
    """
    Predict the fantasy points for all players given their feature vectors.
    
    Args:
    - players_df (pd.DataFrame): A DataFrame containing player data (player_id, player_name, and feature columns).
    - feature_columns (List[str]): The correct list of feature names (column names) used during training.
    
    Returns:
    - pd.DataFrame: A DataFrame with player_id, player_name, and predicted fantasy points for each player.
    """
    try:
        print("Starting the prediction process...")

        # Load the pre-trained model using pickle
        print("Loading the pre-trained model...")
        with open('../../Code/Model_Final/ODI/model_odi.pkl', 'rb') as model_file:
            model = pickle.load(model_file)
        print("Model loaded successfully.")

        # Ensure the columns in players_df match the feature columns used during training
        print(f"Feature columns from training: {feature_columns}")

        # Extract the correct features based on feature_columns (order must match the training data)
        players_df = players_df[feature_columns]  # Reorder the columns to match training

        # Check if the correct number of features are present
        print(f"Number of features in input DataFrame: {len(players_df.columns)}")
        print("Input DataFrame preview:")
        print(players_df.head())

        # Predict fantasy points for all players
        print("Predicting fantasy points for all players...")
        predicted_fantasy_points = model.predict(players_df)
        print(f"Predictions completed. Number of predictions: {len(predicted_fantasy_points)}")

        # Add predicted fantasy points to the DataFrame
        players_df['predicted_fantasy_points'] = predicted_fantasy_points
        print("Predicted fantasy points added to DataFrame.")

        # Create the resulting DataFrame with only player_id, player_name, and predicted_fantasy_points
        results_df = pd.DataFrame({
            'player_id': players_df['player_id'],
            'player_name': players_df['player_name'],
            'predicted_fantasy_points': predicted_fantasy_points
        })
        print("Result DataFrame created with player_id, player_name, and predicted_fantasy_points.")

        # Display preview of results
        print("Result DataFrame preview:")
        print(results_df.head())
        
        return results_df
    
    except Exception as e:
        print(f"Error while loading model or predicting fantasy points: {e}")
        return pd.DataFrame(columns=['player_id', 'player_name', 'predicted_fantasy_points'])


In [46]:
players_df = feature_df
# Get predicted fantasy points for all players   
predicted_fantasy_points_df = predict_fantasy_points_for_all_players(players_df)

# Display the result
print(predicted_fantasy_points_df)



Starting the prediction process...
Loading the pre-trained model...
Model loaded successfully.
Extracted feature columns: Index(['career_batsman_100s_odi', 'career_batsman_50s_odi',
       'career_batsman_average_runs_odi', 'career_batsman_strike_rate_odi',
       'career_batsman_total_fours_odi', 'career_batsman_total_runs_odi',
       'career_batsman_total_sixes_odi', 'career_bowler_average_odi',
       'career_bowler_economy_rate_odi', 'career_bowler_wickets_odi',
       'career_fielder_total_catches_odi', 'career_fielder_total_runouts_odi',
       'recent_batsman_100s_odi', 'recent_batsman_50s_odi',
       'recent_batsman_average_runs_odi', 'recent_batsman_strike_rate_odi',
       'recent_batsman_total_fours_odi', 'recent_batsman_total_runs_odi',
       'recent_batsman_total_sixes_odi', 'recent_bowler_average_odi',
       'recent_bowler_economy_rate_odi', 'recent_bowler_wickets_odi',
       'recent_fielder_total_catches_odi', 'recent_fielder_total_runouts_odi',
       'venue_batsma

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Now we take in as input the dataframe of player_id, player_name, team_name, role , predicted_fantasy_points , in the ouput we return the recommneded dream team of 11 players with predicted fantasy points , and role 



In [None]:
import pandas as pd
from typing import List, Dict, Any

def generate_recommended_team(players: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Generate a recommended team based on predicted fantasy points and team diversity.
    The team should contain 11 players with the highest predicted fantasy points, while maintaining role diversity 
    and ensuring no more than 5 players from one team.
    
    Args:
    - players (List[Dict[str, Any]]): List of players with player_id, player_name, team_name, role, and predicted_fantasy_points.
    
    Returns:
    - List[Dict[str, Any]]: A list of 11 recommended players for the dream team, including their role and predicted fantasy points.
    """
    try:
        # Step 1: Pick one player from each role based on predicted fantasy points
        selected_players = []
        for role in ["Batsman", "Bowler", "Wicket-Keeper", "All-Rounder"]:
            role_players = [p for p in players if p["role"] == role]
            if role_players:
                selected_players.append(max(role_players, key=lambda x: x["predicted_fantasy_points"]))

        # Step 2: Select remaining players to make a team of 11
        remaining_players = [p for p in players if p not in selected_players]
        remaining_players.sort(key=lambda x: x["predicted_fantasy_points"], reverse=True)

        while len(selected_players) < 11:
            next_player = remaining_players.pop(0)
            selected_players.append(next_player)

        # Step 3: Ensure team diversity (max 5 players from one team)
        team_counts = pd.DataFrame(selected_players)["team_name"].value_counts()
        if team_counts.max() > 5:
            # If a team has more than 5 players, replace the lowest fantasy point player with the highest fantasy point player from the other team
            for team in team_counts.index:
                if team_counts[team] > 5:
                    # Identify the players from the overrepresented team
                    overrepresented_players = [p for p in selected_players if p["team_name"] == team]
                    # Identify the player with the lowest predicted fantasy points
                    lowest_fantasy_player = min(overrepresented_players, key=lambda x: x["predicted_fantasy_points"])
                    # Find the best available player from a different team
                    available_players = [p for p in players if p["team_name"] != team and p not in selected_players]
                    replacement_player = max(available_players, key=lambda x: x["predicted_fantasy_points"])
                    # Replace the lowest fantasy point player with the new player
                    selected_players.remove(lowest_fantasy_player)
                    selected_players.append(replacement_player)
                    break

        # Step 4: Sort selected players by predicted fantasy points
        selected_players.sort(key=lambda x: x["predicted_fantasy_points"], reverse=True)

        # Return the selected players as the final recommended team
        return selected_players

    except Exception as e:
        print(f"Error in generating recommended team: {e}")
        return []

