In [None]:
'''the distribution of the number of games that players play in a season:
In this code, we first load the data from a CSV file called ‘data.csv’.
We then calculate the distribution of the number of games played by players using the value_counts() method.
This method returns a Series containing counts of unique values.
We set the normalize parameter to True to get the distribution as a percentage.
We then calculate the percentage of games that players participate in by dividing each player’s number of games
played by the total number of games played and multiplying by 100'''

import pandas as pd
from transfermarkt_analysis.consts import CLEANIZED_DIR


def games_played():
    df = pd.read_csv(CLEANIZED_DIR / "player_appearances.csv").reset_index(drop= True)
    matchs_df = pd.read_csv(CLEANIZED_DIR / "matches.csv").reset_index(drop= True)
    concat_df = pd.concat([df,matchs_df], axis=1)
    result = concat_df.loc[concat_df['season_id'] == 2021 ].groupby('player_id').count().reset_index().loc[:,['player_id','season_id']]
    return result['season_id']



# Calculate the distribution of the number of games played by players
match_count = games_played()
games_played_distribution = match_count.value_counts(normalize=True)


# Calculate the percentage of games that players participate in
total_games = match_count.sum()
player_games_percentage = (match_count / total_games) * 100

print(games_played_distribution)
print(player_games_percentage)

In [None]:
'''investigate whether there is a relationship between the number of goals scored and the estimated price of a player on the site 
using linear regression:
In this code, we first load the data from a CSV file called 'data.csv'. We then plot the relationship between goals scored and 
estimated price using the `regplot()` function from Seaborn. 
This function plots data and a linear regression model fit. We then fit a linear regression model to the data 
using scikit-learn's `LinearRegression()` class.
We reshape the input data using NumPy's `reshape()` method to ensure that it has the correct shape for fitting the model.
Finally, we print the slope and intercept of the linear regression line.'''

import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from transfermarkt_analysis.consts import CLEANIZED_DIR

# Load data
data = pd.read_csv('data.csv')

# Plot the relationship between goals scored and estimated price
sns.regplot(x='goals_scored', y='estimated_price', data=data)

# Fit a linear regression model to the data
X = data['goals_scored'].values.reshape(-1, 1)
y = data['estimated_price'].values.reshape(-1, 1)
model = LinearRegression().fit(X, y)

# Print the slope and intercept of the linear regression line
print('Slope:', model.coef_[0][0])
print('Intercept:', model.intercept_[0])

In [None]:
'''investigate whether there is a relationship between the number of goals scored and the estimated price of a striker on the site
using linear regression:
In this code, we first load the data from a CSV file called 'data.csv'. We then filter the data for strikers using boolean indexing. 
We plot the relationship between goals scored and estimated price for strikers using the `regplot()` function from Seaborn.
We then fit a linear regression model to the data using scikit-learn's `LinearRegression()` class.
We reshape the input data using NumPy's `reshape()` method to ensure that it has the correct shape for fitting the model.
Finally, we print the slope and intercept of the linear regression line.'''

import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from transfermarkt_analysis.consts import CLEANIZED_DIR


# Load data
data = pd.read_csv('data.csv')

# Filter data for strikers
strikers = data[data['position'] == 'striker']

# Plot the relationship between goals scored and estimated price for strikers
sns.regplot(x='goals_scored', y='estimated_price', data=strikers)

# Fit a linear regression model to the data
X = strikers['goals_scored'].values.reshape(-1, 1)
y = strikers['estimated_price'].values.reshape(-1, 1)
model = LinearRegression().fit(X, y)

# Print the slope and intercept of the linear regression line
print('Slope:', model.coef_[0][0])
print('Intercept:', model.intercept_[0])


In [None]:
'''the distribution of estimated player prices by player position and the number of goals scored in different leagues.'''

import pandas as pd
from transfermarkt_analysis.consts import CLEANIZED_DIR


# Load data
data = pd.read_csv('data.csv')

# Obtain the distribution of estimated player prices by player position
price_distribution = data.groupby('position')['estimated_price'].mean()

# Obtain the number of goals scored in different leagues
goals_scored = data.groupby('league')['goals_scored'].sum()

print(price_distribution)
print(goals_scored)


In [None]:
import pandas as pd

# Load data
data = pd.read_csv('data.csv')

# Obtain the distribution of estimated player prices by player position
price_distribution = data.groupby('position')['estimated_price'].mean()

print(price_distribution)
