# Calcio Trends Impact Score

## Overview
Calcio Trends Impact Score is a metric that measures the impact of a player in a football match. It is calculated based on the player's performance in the match and the importance of the match. The score is calculated using a combination of various factors such as goals scored, assists, passes completed, tackles won, and other performance metrics. The score is then normalized to a scale of 0 to 100 to provide a standardized measure of a player's impact in a match.

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import calculator as calc

In [25]:
app = pd.read_csv("Assignment_Data/appearances.csv")
players = pd.read_csv("Assignment_Data/players.csv")

In [26]:
def filter_last_season(app, filter_year):
    """
    Filter appearances for the filter_year dropping null values.
    """
    appearances_last_season_df = app[app['date'].str.contains(filter_year, na=False)]
    appearances_last_season_df = appearances_last_season_df.dropna(subset=['yellow_cards', 'red_cards', 'goals', 'assists'])
    return appearances_last_season_df

def merge_and_select_columns(players, appearances_last_season_df):
    """
    Merge player data with filtered appearances and select necessary columns.
    """
    merged_df = pd.merge(players, appearances_last_season_df, on='player_id', how='inner')
    selected_columns = ['player_id', 'market_value_in_eur', 'position', 'yellow_cards', 'red_cards', 'goals', 'assists', 'date',  'game_id']
    merged_df = merged_df[selected_columns]
    merged_df = merged_df.dropna(subset=['yellow_cards', 'red_cards', 'goals', 'assists'])
    return merged_df

def group_by_player(merged_df):
    """
    Group by player and aggregate game statistics.
    """
    grouped_df = merged_df.groupby('player_id').agg({
        'market_value_in_eur': 'first',
        'position': 'first',
        'yellow_cards': list,
        'red_cards': list,
        'goals': list,
        'assists': list,
        'date': list,
        'game_id': list
    }).reset_index()
    return grouped_df

def filter(filter_year):
    """
    Main function to load data, filter, merge, and group.
    """
    appearances_last_season_df = filter_last_season(app, filter_year)
    merged_df = merge_and_select_columns(players, appearances_last_season_df)
    grouped_df = group_by_player(merged_df)
    return grouped_df


In [27]:
# Example usage
grouped_df = filter('2023')
grouped_df.head()  # Display the first few rows of the resulting DataFrame

Unnamed: 0,player_id,market_value_in_eur,position,yellow_cards,red_cards,goals,assists,date,game_id
0,3333,1500000.0,Midfield,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2023-01-17, 2023-01-21, 2023-01-29, 2023-02-0...","[3999705, 3838786, 4013658, 3838777, 3838764, ..."
1,3455,,Attack,"[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 1]","[0, 0, 0, 0]","[2023-02-26, 2023-03-04, 2023-03-13, 2023-03-18]","[3844973, 3844977, 3844992, 3845005]"
2,4742,50000.0,Midfield,"[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[2023-01-07, 2023-01-14, 2023-01-18, 2023-01-2...","[3851118, 3851275, 3851321, 3851247, 3851157, ..."
3,5336,450000.0,Midfield,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2023-02-12, 2023-02-18, 2023-02-24, 2023-03-0...","[3889137, 3889144, 3889150, 3889160, 3889165, ..."
4,7161,13000000.0,Attack,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, ...","[0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[2023-01-22, 2023-01-28, 2023-02-04, 2023-02-1...","[3839280, 3839317, 3839333, 3839354, 3839369, ..."


In [28]:
def calculate_score(player_data):
    """
    Calculate the score for a player based on the given formula.
    
    Parameters:
    player_data (tuple): A tuple containing position, a list of yellow cards, red cards, goals, and game ids.
    
    Returns:
    float: The calculated score for the player.
    """
    position, yellow_cards, red_cards, goals, assists, game_ids = player_data[1:7]
    score = 1
    
    for i in range(len(game_ids)):
        position_coefficient = 1+((position in ['attack', 'midfield']) * 0.2)
        assist_score = int(assists[i]) * 0.45
        goal_score = goals[i] * position_coefficient
        yellow_card_score = yellow_cards[i] * -0.2
        if position == 'defense':
            yellow_card_score *= -1
        red_card_score = red_cards[i] * -0.5
        
        game_score = goal_score + yellow_card_score + red_card_score + assist_score
        score += game_score
        
    return score

In [29]:
# Example usage
player_data_example = (
    'player1',             # player_id
    'attack',              # position
    [1, 0, 2],             # yellow_cards list
    [1, 0, 0],             # red_cards list
    [0, 2, 0],             # goals list
    [3, 1, 4],             # assists list
    ['game1', 'game2', 'game3'],  # game_ids list
    80000000               # market_value_in_eur
)

# Calculate score for the example player data
score = calculate_score(player_data_example)

print(f"The calculated score is: {score}")

The calculated score is: 5.9


In [30]:
def normalize_scores(scores):
    """
    Normalize the scores to be between 1 and 10.
    
    Parameters:
    scores (list): A list of scores for all players.
    
    Returns:
    list: A list of normalized scores between 1 and 100.
    """
    min_score = min(scores)
    max_score = max(scores)
    normalized_scores = [1 + (score - min_score) * (100 - 1) / (max_score - min_score) for score in scores]
    return normalized_scores

In [31]:
# Example usage
players_data = [
    ('player1', 'attack', [1, 0, 0], [0, 1, 0], [2, 1, 3], [1, 1, 1], ['game1', 'game2', 'game3']),
    ('player2', 'midfield', [0, 0, 1], [0, 0, 0], [1, 2, 1], [2, 2, 2], ['game1', 'game2', 'game3']),
    ('player3', 'defense', [0, 1, 0], [1, 0, 0], [0, 0, 0], [0, 0, 0], ['game1', 'game2', 'game3']),
    # Add more player data as needed
]

# Calculate scores for all players
scores = [calculate_score(player_data) for player_data in players_data]

# Normalize the scores
normalized_scores = normalize_scores(scores)

# Display the normalized scores
for player_data, score, normalized_score in zip(players_data, scores, normalized_scores):
    print(f"Player: {player_data[0]}, Original Score: {score}, Normalized Score: {normalized_score}")


Player: player1, Original Score: 8.85, Normalized Score: 100.0
Player: player2, Original Score: 8.3, Normalized Score: 93.31901840490798
Player: player3, Original Score: 0.7, Normalized Score: 1.0


In [32]:
data = filter('2023')

# Calculate scores for all players
scores = [calculate_score(player_data) for player_data in data]

# Normalize the scores
#normalized_scores = normalize_scores(scores)

# Display the normalized scores
#for player_data, score, normalized_score in zip(data, scores, normalized_scores):
#    print(f"Player: {player_data[0]}, Original Score: {score}, Normalized Score: {normalized_score}")

ValueError: invalid literal for int() with base 10: 'r'