In [29]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import numpy as np

# Step 1: Load the dataset
file_path = 'school_data.csv'
df = pd.read_csv(file_path)

# Step 2.1: Explode Positions
# Split the 'Position' column into multiple rows for players with multiple positions
df['Position'] = df['Position'].str.split(', ')
df_exploded = df.explode('Position').reset_index(drop=True)

# Step 2.2: Normalize the numerical columns
numeric_cols = df_exploded.select_dtypes(include=['float64', 'int64']).columns
scaler = StandardScaler()
df_exploded[numeric_cols] = scaler.fit_transform(df_exploded[numeric_cols])

# Step 2.3: One-hot encode the 'Position' column
df_encoded = pd.get_dummies(df_exploded, columns=['Position'])

# Step 2.4: Handle missing values by dropping rows with NaNs
df_cleaned = df_encoded.dropna().reset_index(drop=True)
df_exploded = df_exploded.loc[df_cleaned.index].reset_index(drop=True)

# Step 3: Unsupervised Learning using Autoencoder
# Set features for the autoencoder
X = df_cleaned.drop(columns=['player_name', 'team']).values.astype(np.float32)

# Step 3.1: Define the autoencoder model
input_dim = X.shape[1]
encoding_dim = 32  # Size of the encoded representation

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='linear')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
encoder = Model(inputs=input_layer, outputs=encoded)

# Step 3.2: Compile and train the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X, X, epochs=50, batch_size=32, shuffle=True, validation_split=0.2, verbose=0)

# Step 4: Use the encoded representation to rank players
encoded_features = encoder.predict(X)

# Step 4.1: Calculate ranking score based on the encoded features
df_cleaned['ranking'] = encoded_features[:, 0]  # Using the first encoded feature for ranking

# Step 5: Create separate dataframes for each position with rankings
# Re-merge position information with player names and teams
df_cleaned['player_name'] = df_exploded['player_name']
df_cleaned['team'] = df_exploded['team']
df_cleaned['Position'] = df_exploded['Position']

# Step 5.1: Group by position and create separate dataframes for each position
position_groups = df_cleaned.groupby('Position')
position_dataframes = {}

for position, group in position_groups:
    position_df_name = f"{position.lower()}_position_df"
    position_df = group[['player_name', 'team', 'ranking']].sort_values(by='ranking', ascending=False)
    position_dataframes[position] = position_df
    globals()[position_df_name] = position_df  # Create a dataframe variable with the name xx_position_df
    print(f"Displaying Player Rankings for Position: {position}")
print(position_df.head())


Displaying Player Rankings for Position: 0
Displaying Player Rankings for Position: AMF
Displaying Player Rankings for Position: CB
Displaying Player Rankings for Position: CF
Displaying Player Rankings for Position: DMF
Displaying Player Rankings for Position: GK
Displaying Player Rankings for Position: LAMF
Displaying Player Rankings for Position: LB
Displaying Player Rankings for Position: LB5
Displaying Player Rankings for Position: LCB
Displaying Player Rankings for Position: LCB3
Displaying Player Rankings for Position: LCMF
Displaying Player Rankings for Position: LCMF3
Displaying Player Rankings for Position: LDMF
Displaying Player Rankings for Position: LW
Displaying Player Rankings for Position: LWF
Displaying Player Rankings for Position: RAMF
Displaying Player Rankings for Position: RB
Displaying Player Rankings for Position: RB5
Displaying Player Rankings for Position: RCB
Displaying Player Rankings for Position: RCB3
Displaying Player Rankings for Position: RCMF
Displayin

In [30]:
position_dataframes

{'0':          player_name                             team   ranking
 57     C. Fitzgerald  Maryland College Park Terrapins  2.223408
 132        J. Demian                 Indiana Hoosiers  2.191338
 142       J. Maguire               Washington Huskies  1.806891
 143    J. Maguire(1)               Washington Huskies  1.806891
 86           D. Hass             Detroit Mercy Titans  1.751134
 55      C. Desroches              Ohio State Buckeyes  1.350482
 199         M. Nesci                 Indiana Hoosiers  0.570634
 31        B. Keating          Rutgers Scarlet Knights  0.567461
 41   B. Saint Martin  Maryland College Park Terrapins  0.292319
 39         B. Minzey                 Indiana Hoosiers  0.234468
 145         J. Mason          Michigan State Spartans  0.000000
 171       L. Bezerra                 Indiana Hoosiers  0.000000
 216      N. Cassiday              Michigan Wolverines  0.000000
 259         R. Keefe                Wisconsin Badgers  0.000000,
 'AMF':       playe