In [118]:
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler


In [119]:
df=pd.read_csv("Wildlife_Dataset_Top6.csv")

Creating the Feature Matrix

In [120]:
feature_df = (
    df
    .groupby(['Taxon', 'Exporter'])
    .agg(
        appendix_risk = ('App.', 'mean'),
        import_qty_log = ('import_qty_log', 'sum'),
        export_qty_log = ('export_qty_log', 'sum'),
        live_trade_ratio = ('is_live', 'mean'),
        purpose_risk = ('Purpose', 'mean'),
        source_risk = ('Source', 'mean'),
        num_trade_events = ('Exporter', 'count')
    )
    .reset_index()
)

In [121]:
feature_df.head()

Unnamed: 0,Taxon,Exporter,appendix_risk,import_qty_log,export_qty_log,live_trade_ratio,purpose_risk,source_risk,num_trade_events
0,Alligator Mississippiensis,Albania,2.0,0.693147,0.0,0.0,3.0,3.0,1
1,Alligator Mississippiensis,Andorra,2.0,1.609438,0.0,0.0,3.0,3.0,1
2,Alligator Mississippiensis,Antigua and Barbuda,2.0,0.693147,0.0,0.0,3.0,3.0,1
3,Alligator Mississippiensis,Argentina,2.0,0.0,4.442651,0.0,3.0,3.0,1
4,Alligator Mississippiensis,Australia,2.0,21.817762,0.0,0.0,3.0,2.636364,11


In [122]:
feature_df.to_csv("Feature_Matrix.csv", index=False)
from google.colab import files
files.download("Feature_Matrix.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [123]:
# drop categorical columns
id_cols = ['Taxon', 'Exporter']
X = feature_df.drop(columns=id_cols)

In [124]:
# create taxonomy lookup table
taxonomy_lookup = (
    df[['Taxon', 'Order', 'Family', 'Genus']]
    .drop_duplicates()
)

In [125]:
X.describe()

Unnamed: 0,appendix_risk,import_qty_log,export_qty_log,live_trade_ratio,purpose_risk,source_risk,num_trade_events
count,357.0,357.0,357.0,357.0,357.0,357.0,357.0
mean,2.026048,36.788232,58.593105,0.047081,2.83521,1.97036,28.235294
std,0.122931,84.582167,173.187719,0.187991,0.328915,0.799381,71.464225
min,2.0,0.0,0.0,0.0,1.0,1.0,1.0
25%,2.0,1.386294,0.0,0.0,2.875,1.073171,2.0
50%,2.0,5.894403,3.663562,0.0,3.0,2.0,5.0
75%,2.0,32.583766,30.121181,0.0,3.0,2.9,24.0
max,3.0,762.085341,1553.938299,1.0,3.0,3.0,702.0


Scaling the Feature Matrix

In [126]:
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

In [127]:
X_scaled.shape

(357, 7)

Training Isolation Forest Model

In [128]:
iso_model = IsolationForest(
    n_estimators=300,
    contamination=0.08,   # ~5–10% risky cases
    random_state=42
)

iso_model.fit(X_scaled)

Generate Risk Scores for each Species

In [129]:
# Raw anomaly scores (higher = riskier)
risk_scores_raw = -iso_model.decision_function(X_scaled)

In [130]:
risk_scaler = MinMaxScaler()
poaching_risk_score = risk_scaler.fit_transform(
    risk_scores_raw.reshape(-1, 1)
).ravel()

In [131]:
feature_df['poaching_risk_score'] = poaching_risk_score

Generate Risk Score and highest-risk country per species

In [132]:
species_poaching_risk = (
    feature_df
    .sort_values('poaching_risk_score', ascending=False)
    .groupby('Taxon', as_index=False)
    .first()
)

In [133]:
final_result = species_poaching_risk[
    ['Taxon', 'Exporter', 'poaching_risk_score']
].rename(columns={
    'Exporter': 'likely_poaching_country'
})


In [134]:
final_result = final_result.merge(
    taxonomy_lookup,
    on='Taxon',
    how='left'
)

In [135]:
final_result.head()


Unnamed: 0,Taxon,likely_poaching_country,poaching_risk_score,Order,Family,Genus
0,Alligator Mississippiensis,United States,0.803649,Crocodylia,Alligatoridae,Alligator
1,Crocodylus Niloticus,Italy,1.0,Crocodylia,Crocodylidae,Crocodylus
2,Crocodylus Porosus,France,0.858284,Crocodylia,Crocodylidae,Crocodylus
3,Python Bivittatus,Italy,0.724669,Serpentes,Pythonidae,Python
4,Python Reticulatus,Italy,0.982608,Serpentes,Pythonidae,Python


In [138]:
final_result.to_csv("Final_Output.csv", index=False)
from google.colab import files
files.download("Final_Output.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Validation

In [137]:
validation_df = final_result.merge(
    appendix_max,
    left_on=['Taxon', 'likely_poaching_country'],
    right_on=['Taxon', 'Exporter'],
    how='left'
)

validation_df.groupby('appendix_max')['poaching_risk_score'].mean()

Unnamed: 0_level_0,poaching_risk_score
appendix_max,Unnamed: 1_level_1
2.0,0.788296
3.0,0.929142
