In [21]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

Data Loading

In [22]:
data = pd.read_csv('data.csv')
data

Unnamed: 0,soldier_id,Greed Score (0-100),Respect Level (0-100),Loyalty History (0/1),Psychological Stress (0-100),Recent Battle Experience (0/1),Age (18-50),Likelihood of Betrayal (0/1),Social Integration Score,Financial Stress Indicator,Recent Performance Evaluation Scores (1-10),Performance Trend
0,1,51,56,0,75,0,43,0,0.72,2.04,9,Decline
1,2,92,16,0,6,0,21,1,0.76,4.16,1,Stable
2,3,14,85,0,28,0,35,0,0.40,3.04,10,Increase
3,4,71,89,0,10,0,46,1,0.96,4.04,6,Decline
4,5,60,43,1,32,0,46,1,0.68,5.15,6,Stable
...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,95,5,1,47,0,41,1,0.48,5.36,8,Decline
496,497,47,90,0,86,1,49,1,0.36,2.00,2,Increase
497,498,88,39,0,69,1,18,1,0.76,4.01,4,Decline
498,499,0,97,1,45,0,25,0,0.16,4.38,10,Stable


Selecting Features

In [23]:
features = [
    'Greed Score (0-100)',
    'Respect Level (0-100)',
    'Loyalty History (0/1)',
    'Psychological Stress (0-100)',
    'Recent Battle Experience (0/1)',
    'Age (18-50)',
    'Social Integration Score',
    'Financial Stress Indicator',
    'Recent Performance Evaluation Scores (1-10)'
]

X = data[features]

In [24]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Training Using Isolation Forest

In [25]:
isolation_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
isolation_forest.fit_predict(X_scaled)

array([ 1,  1,  1, -1,  1,  1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

Getting Anomaly Score

In [26]:
data['Betrayal Score'] = -isolation_forest.decision_function(X_scaled)

Normalizing Anomaly Score to 0-100

In [27]:
data['Betrayal Score'] = (data['Betrayal Score'] - data['Betrayal Score'].min()) / (data['Betrayal Score'].max() - data['Betrayal Score'].min()) * 100

Ranking Soldiers

In [28]:
data['Rank'] = data['Betrayal Score'].rank(ascending=True)

In [29]:
print(data[['soldier_id', 'Betrayal Score', 'Rank']])

     soldier_id  Betrayal Score   Rank
0             1       50.716784  314.0
1             2       77.328222  472.0
2             3       33.825535  160.0
3             4       83.596655  487.0
4             5       42.769643  236.0
..          ...             ...    ...
495         496       62.503436  396.0
496         497       54.985582  348.0
497         498       51.609432  318.0
498         499       66.995837  433.0
499         500       24.790904   73.0

[500 rows x 3 columns]


In [30]:
data.to_csv('result.csv', index=False)