In [None]:
import pandas as pd
from io import StringIO
pd.set_option('display.max_columns', None)

In [None]:
#Import dataframe
df_100pos = pd.read_csv('DataFiles/merged_df_100pos.csv', low_memory=False)

In [None]:
def feet_to_meters(height):
    if isinstance(height, str):
        feet, inches = map(int, height.split('-'))
        total_feet = feet + inches / 12  # Convert inches to feet
        total_meters = total_feet * 0.3048  # Convert feet to meters
        return round(total_meters, 2)  # Round to 2 decimals
    else:
        return None  # Handle missing or invalid values

In [None]:
# Apply the function to convert heights
df_100pos['PLAYER_HEIGHT_METERS'] = df_100pos['HEIGHT'].apply(feet_to_meters)

In [None]:
# Adding additional advanced metric columns

# Shooting efficiency 
df_100pos['FG3A%'] = df_100pos['FG3A'] / df_100pos['FGA']
df_100pos['PTS/FGA'] = df_100pos['PTS'] / df_100pos['FGA']
df_100pos['FG3M/FGM'] = df_100pos['FG3M'] / df_100pos['FGM']
df_100pos['FTA/FGA'] = df_100pos['FTA'] / df_100pos['FGA']

# True Shooting Percentage
df_100pos['TRU%'] = 0.5 * df_100pos['PTS'] / (df_100pos['FGA'] + 0.475 * df_100pos['FTA'])

# Assists to Turnover Ratio
df_100pos['AST_TOV'] = df_100pos['AST'] / df_100pos['TOV']

df_100pos[(df_100pos['PLAYER_NAME'] == 'Vlade Divac')]

In [None]:
# Replacing intermediary positions with one. All in all there are 3 positions - C, F and G. 
df_100pos['POSITION'] = df_100pos['POSITION'].str.replace('F-C', 'F').replace('C-F', 'C').replace('F-G', 'F').replace('G-F', 'G')

In [None]:
# European countries in basketball sense
europe = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark',
  'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland',
  'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands',
  'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 
  'United Kingdom', 'Norway', 'Switzerland', 'Belarus', 'Russia', 'Ukraine',
  'Moldova', 'Georgia', 'Armenia', 'Azerbaijan', 'Turkey', 'Israel', 'Albania',
  'North Macedonia', 'Serbia', 'Montenegro', 'Bosnia and Herzegovina', 'Iceland']

In [None]:
# Define function for European countries
def filter_europe(df, column_name, europe):
    return df[df[column_name].isin(europe)]

In [None]:
# Define function for non-european countries
def filter_non_europe(df, column_name='COUNTRY', country_list=europe):
    return df[~df[column_name].isin(europe)]

In [None]:
# Number of positions per unique European player:
df_100pos.groupby('POSITION')['PLAYER_ID'].nunique()

In [None]:
filter_non_europe

df_100pos.groupby(df_100pos['COUNTRY'] == 'Netherlands')
df_100pos


In [None]:
# Number of positions per unique US player:
non_europe_C.groupby('POSITION')['PLAYER_ID'].nunique()

In [None]:
import altair as alt
import pandas as pd
import altair_saver


# Annahme: Sie haben zwei DataFrames: data_per_min_europe_regular und data_per_min_usa_regular

# F체gen Sie eine neue Spalte hinzu, um anzuzeigen, ob der Spieler Europ채er ist oder nicht
europe_C['Europe'] = 'European'
non_europe_C['Europe'] = 'Non-European'

# Kombinieren Sie die beiden DataFrames
combined_df = pd.concat([europe_C, non_europe_C])

# Liste der Metriken, die geplottet werden sollen
metrics = ['FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 
           'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 
           'PF', 'PTS', 'FG3A%', 'PTS/FGA', 'FG3M/FGM', 'FTA/FGA', 
           'TRU%', 'AST_TOV', 'PLAYER_HEIGHT_METERS']

# Liste zum Speichern der Diagramme
charts = []

# Einen Bruchteil des kombinierten DataFrame ausw채hlen
sampled_df = combined_df.sample(frac=0.5)  # Passen Sie den Bruchteil bei Bedarf an

# F체r jede Metrik ein gruppiertes Balkendiagramm erstellen und die Diagramme in eine Liste speichern
for metric in metrics:
    chart = alt.Chart(sampled_df).mark_bar().encode(
        x=alt.X('Europe:N', title='Europe'),
        y=alt.Y(f'mean({metric}):Q', title=f'Average {metric}'),
        color=alt.Color('Europe:N', legend=alt.Legend(title='Europe'))
    ).properties(
        title=f'Average {metric} by Europe',
        width=400,
        height=300
    )
    charts.append(chart)

# Diagramme in sechs Zeilen stapeln
alt_row = alt.vconcat(*[alt.hconcat(*charts[i:i+4]) for i in range(0, len(charts), 4)])

# Die Diagramme anzeigen
alt_row


In [None]:
## Bar chart of centers per country since 1996

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import numpy as np

# Season count per country per player:
above_avg_C = europe_C.groupby('COUNTRY')['PLAYER_NAME'].nunique().reset_index()

# Sort the DataFrame by 'Unique Player Count' in descending order
above_avg_C = above_avg_C.sort_values(by='PLAYER_NAME', ascending=False)

fig, ax = plt.subplots(figsize=(10, 6))

sns.barplot(x='COUNTRY', y="PLAYER_NAME", data=above_avg_C, palette='rocket', linewidth=2)  # Create a count plot

plt.xticks(rotation=80)  # Rotate x-axis labels for better readability with many countries

plt.title("European Centers in NBA in 1996", weight='bold', fontsize=16)
plt.xlabel("Country", rotation=0, labelpad=20, weight='bold', fontsize=12)
plt.ylabel("Count", rotation=0, labelpad=20, weight='bold', fontsize=12)
plt.ylim(ymin=0)        # Start y-axis at zero

# Set the y-axis to show only integer ticks
plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True))


plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add grid lines with style

plt.tight_layout()  # Adjust spacing for better readability

plt.show()  # Display the plot

In [None]:
## Bar chart of centers with above average stats

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import numpy as np


# Step 1: Filter 'europe_C' based on the above AVG metrics for Centers:
filtered_df = europe_C[(europe_C['REB'] >= 13.0) & (europe_C['BLK'] >= 2.0) & (europe_C['AST'] >= 3.0)]

# Season count per country per player:
above_avg_C = filtered_df.groupby('COUNTRY')['PLAYER_NAME'].nunique().reset_index()

# Sort the DataFrame by 'Unique Player Count' in descending order
above_avg_C = above_avg_C.sort_values(by='PLAYER_NAME', ascending=False)


fig, ax = plt.subplots(figsize=(10, 6))

sns.barplot(x='COUNTRY', y="PLAYER_NAME", data=above_avg_C, palette='rocket', linewidth=2)  # Create a count plot

plt.xticks(rotation=80)  # Rotate x-axis labels for better readability with many countries

plt.title("European Centers in NBA with > AVG Stats since 1996", weight='bold', fontsize=16)
plt.xlabel("Country", rotation=0, labelpad=20, weight='bold', fontsize=12)
plt.ylabel("Count", rotation=0, labelpad=20, weight='bold', fontsize=12)
plt.ylim(ymin=0)
plt.ylim(ymax=4)           # Start y-axis at zero

# Set the y-axis to show only integer ticks
plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True))


plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add grid lines with style

plt.tight_layout()  # Adjust spacing for better readability

plt.show()  # Display the plot