## Import Required Libraries

In [1]:
import sys
from pathlib import Path

try:
    import pandas as pd
except ImportError as e:
    print(f"Error: Required libraries not found: {e}")
    sys.exit(1)

print("✓ All libraries imported successfully")

✓ All libraries imported successfully


## Load the Data

In [2]:
def load_dataset(filepath: str) -> pd.DataFrame:
    path = Path(filepath)
    
    if not path.exists():
        raise FileNotFoundError(f"Dataset not found: {filepath}")
    
    return pd.read_csv(path)

try:
    train_df = load_dataset('../resources/Train_knight.csv')
    print(f"✓ Dataset loaded: {train_df.shape[0]} rows, {train_df.shape[1]} columns")
except FileNotFoundError as e:
    print(f"✗ Error: {e}")
    sys.exit(1)

✓ Dataset loaded: 398 rows, 31 columns


In [3]:
df_temp = train_df.copy()
df_temp['knight'] = df_temp['knight'].map({'Jedi': 0, 'Sith': 1})

full_matrix = df_temp.corr()
print(f"Matrix shape: {full_matrix.shape}")
print("\nFull correlation matrix:")
full_matrix

Matrix shape: (31, 31)

Full correlation matrix:


Unnamed: 0,Sensitivity,Hability,Strength,Power,Agility,Dexterity,Awareness,Prescience,Reactivity,Midi-chlorien,...,Evade,Stims,Sprint,Combo,Delay,Attunement,Empowered,Burst,Grasping,knight
Sensitivity,1.0,0.337556,0.997842,0.98612,0.171059,0.506158,0.658587,0.816778,0.168508,-0.340942,...,0.279326,0.959444,0.941086,0.081617,0.396373,0.495775,0.726129,0.16937,0.002709,-0.721566
Hability,0.337556,1.0,0.348377,0.331831,0.057664,0.315342,0.355403,0.35098,0.108389,-0.018188,...,0.903324,0.374902,0.35216,0.133681,0.341901,0.358846,0.359781,0.139373,0.192315,-0.446632
Strength,0.997842,0.348377,1.0,0.984738,0.207609,0.55739,0.698516,0.845548,0.202454,-0.290692,...,0.291535,0.96641,0.943184,0.11411,0.440129,0.534855,0.754976,0.194277,0.048324,-0.737403
Power,0.98612,0.331831,0.984738,1.0,0.173705,0.486935,0.665253,0.811245,0.167166,-0.319333,...,0.264068,0.949234,0.957344,0.082672,0.366704,0.481386,0.699648,0.145284,-0.005786,-0.700709
Agility,0.171059,0.057664,0.207609,0.173705,1.0,0.64907,0.529327,0.56771,0.54753,0.567478,...,0.12226,0.258251,0.228527,0.789491,0.465127,0.446691,0.512712,0.383458,0.482839,-0.397458
Dexterity,0.506158,0.315342,0.55739,0.486935,0.64907,1.0,0.881299,0.833358,0.594439,0.537572,...,0.329285,0.611145,0.528624,0.55812,0.868827,0.824416,0.830514,0.510994,0.689885,-0.631987
Awareness,0.658587,0.355403,0.698516,0.665253,0.529327,0.881299,1.0,0.909704,0.504167,0.336899,...,0.341086,0.724532,0.67805,0.444792,0.744397,0.886168,0.853861,0.406751,0.52289,-0.699662
Prescience,0.816778,0.35098,0.845548,0.811245,0.56771,0.833358,0.909704,1.0,0.477038,0.146091,...,0.334433,0.861213,0.81862,0.438275,0.6575,0.739748,0.90692,0.380567,0.367852,-0.790066
Reactivity,0.168508,0.108389,0.202454,0.167166,0.54753,0.594439,0.504167,0.477038,1.0,0.45325,...,0.135197,0.259597,0.220605,0.426958,0.476949,0.448815,0.45047,0.703485,0.43994,-0.375103
Midi-chlorien,-0.340942,-0.018188,-0.290692,-0.319333,0.567478,0.537572,0.336899,0.146091,0.45325,1.0,...,0.039532,-0.210846,-0.241955,0.517969,0.464328,0.376574,0.178145,0.342502,0.765183,-0.008132


## Calculate Correlation with Target

In [4]:
def calculate_correlation(df: pd.DataFrame, target_col: str = 'knight') -> pd.Series:
    """
    Calculate correlation between all features and the target column.
    
    Args:
        df: DataFrame containing the data
        target_col: Name of the target column
        
    Returns:
        Series with correlation values sorted by absolute value (descending)
    """
    if target_col not in df.columns:
        raise ValueError(f"Target column '{target_col}' not found in DataFrame")
    
    df_encoded = df.copy()
    df_encoded[target_col] = df_encoded[target_col].map({'Jedi': 0, 'Sith': 1})
    
    correlations = df_encoded.corr()[target_col]
    correlations_sorted = correlations.abs().sort_values(ascending=False)
    
    return correlations_sorted

In [5]:
try:
    correlations = calculate_correlation(train_df, 'knight')
    
    print("Correlation between features and target (knight):")
    print("=" * 40)
    for feature, corr in correlations.items():
        print(f"{feature:<20} {corr:.6f}")
        
except ValueError as e:
    print(f"✗ Error: {e}")
    sys.exit(1)

Correlation between features and target (knight):
knight               1.000000
Empowered            0.793652
Prescience           0.790066
Stims                0.786797
Recovery             0.777633
Sprint               0.739672
Strength             0.737403
Sensitivity          0.721566
Power                0.700709
Awareness            0.699662
Attunement           0.648893
Dexterity            0.631987
Delay                0.598072
Slash                0.550663
Pull                 0.537800
Lightsaber           0.515340
Evade                0.465605
Hability             0.446632
Burst                0.445847
Combo                0.445223
Blocking             0.421950
Agility              0.397458
Reactivity           0.375103
Grasping             0.350105
Repulse              0.324399
Friendship           0.236633
Mass                 0.113185
Survival             0.043099
Deflection           0.026489
Push                 0.019446
Midi-chlorien        0.008132


## Summary

- **Strongest correlations**: Features with values closest to 1.0 are most predictive
- **Weak correlations**: Features near 0 have little relationship with the target
- High correlation features are good candidates for classification models