---
# **SWEFI: Applications to Climate Data**

---

In [3]:
# Install Libraries (if needed)
# !pip3 install scikit-optimize pycaret[tuners] RiskLabAI==0.0.87 joblib_progress memory_profiler pycaret arch 
# !pip3 install torch
# !pip3 install yfinance

# Import Libraries
from stability_weighted_ensemble_feature_importance import *
from synthetic_dataset_generation import *
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('standardized_data_co2.csv')
data = data.dropna()

# Ensure the 'Year' column is numeric and sort the data by Country and Year
data['Year'] = pd.to_numeric(data['Year'], errors='coerce')
data = data.sort_values(by=['Country', 'Year'])

# Create the binary response variable by computing the year-to-year difference in CO2_emissions for each country.
# For each country, if the difference is positive, label as 1 (emissions increased); otherwise 0.
data['CO2_diff'] = data.groupby('Country')['CO2_emissions'].diff()
# Drop rows where the difference is NaN (first year for each country)
data = data.dropna(subset=['CO2_diff'])
y = pd.Series(np.where(data['CO2_diff'] > 0, 1, 0), name='Direction', index=data.index)

# Define the feature set.
# Drop the identifiers ('Country' and 'Year'), the raw CO2_emissions (since we used it for the target),
# and the computed difference column.
X = data.drop(columns=['Country', 'Year', 'CO2_emissions', 'CO2_diff'])

# SWEFI Parameters (using the same parameter choices as before)
select_n_model = 5
bootstrap_method = SWEFI.stationary_bootstrap

hpo_n_fold = 4
hpo_n_iter = 25
hpo_metric = 'AUC'
hpo_search_library = 'scikit-optimize'
hpo_search_algorithm = 'bayesian'

n_iteration = 5
percentage = 0.6

# Initialize SWEFI with the new feature set and response.
swefi = SWEFI(X, y, n_fold=10)

# Select Models
swefi.select_models(select_n_model=select_n_model)

# Fine-Tune Selected Models
swefi.fine_tune_selected_models(
    hpo_n_fold=hpo_n_fold,
    hpo_n_iter=hpo_n_iter,
    hpo_metric=hpo_metric,
    hpo_search_algorithm=hpo_search_algorithm,
    hpo_search_library=hpo_search_library
)

# Select Univariate-Analysis Methods
swefi.select_univariate_analysis_measurements(measurements=[
    UAMeasure.MUTUAL_INFORMATION.value,
    UAMeasure.ANOVA_F.value,
])

# Compute Feature Importance using the specified bootstrap method and iterations.
swefi.compute_feature_importance_data(bootstrap_method=bootstrap_method, n_iteration=n_iteration, n_repeats=10)

# Compute SWEFI Scores using the given percentage threshold.
swefi.compute_swefi_scores(percentage=percentage)
swefi_scores = swefi.get_swefi_scores()

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Direction
2,Target type,Binary
3,Original data shape,"(969, 45)"
4,Transformed data shape,"(969, 45)"
5,Transformed train set shape,"(959, 45)"
6,Transformed test set shape,"(10, 45)"
7,Numeric features,44
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
6,Random Forest Classifier,0.6559,0.6898,0.7724,0.6878,0.7271,0.265,0.2694,0.028
5,Gradient Boosting Classifier,0.6549,0.6972,0.7934,0.6799,0.7318,0.2549,0.2629,0.095
1,Extra Trees Classifier,0.6507,0.6808,0.7654,0.6859,0.7224,0.2543,0.2584,0.158
4,Logistic Regression,0.6329,0.6472,0.7706,0.6666,0.7138,0.2086,0.2147,0.006
9,SVM - Radial Kernel,0.6288,0.6441,0.7897,0.6567,0.7165,0.1908,0.1987,0.033
3,Ridge Classifier,0.6287,0.6454,0.7776,0.66,0.7132,0.1958,0.2029,0.111
8,Linear Discriminant Analysis,0.6277,0.6456,0.7688,0.6613,0.7102,0.1967,0.2028,0.006
7,Quadratic Discriminant Analysis,0.6195,0.6397,0.6272,0.7023,0.6613,0.2302,0.2327,0.006
0,MLP Classifier,0.6173,0.645,0.7041,0.6702,0.6861,0.1965,0.1977,0.281
2,Decision Tree Classifier,0.5714,0.5577,0.6287,0.6442,0.6354,0.1145,0.1151,0.144


--------------------------------------------------------------------------------
RandomForestClassifier


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6167,0.6455,0.6993,0.6711,0.6849,0.1961,0.1964
1,0.7083,0.7392,0.8531,0.7135,0.7771,0.3649,0.3773
2,0.625,0.6832,0.7552,0.6626,0.7059,0.1947,0.1979
3,0.6736,0.7061,0.7465,0.7162,0.731,0.3166,0.317
Mean,0.6559,0.6935,0.7635,0.6908,0.7247,0.2681,0.2721
Std,0.0373,0.0341,0.0559,0.0242,0.0343,0.0747,0.078


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fi

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6125,0.6443,0.6713,0.6761,0.6737,0.1968,0.1968
1,0.7292,0.7384,0.8042,0.7566,0.7797,0.4292,0.4305
2,0.6583,0.7015,0.6923,0.7226,0.7071,0.2976,0.298
3,0.6653,0.7046,0.662,0.746,0.7015,0.3236,0.3266
Mean,0.6663,0.6972,0.7075,0.7253,0.7155,0.3118,0.313
Std,0.0416,0.0338,0.0569,0.031,0.0392,0.0827,0.0833


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fi

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5792,0.5762,0.7063,0.6312,0.6667,0.1009,0.1021
1,0.625,0.6617,0.8392,0.6417,0.7273,0.1601,0.1756
2,0.6333,0.632,0.7413,0.6752,0.7067,0.2206,0.2223
3,0.6444,0.6692,0.7746,0.6748,0.7213,0.2364,0.2407
Mean,0.6205,0.6348,0.7653,0.6557,0.7055,0.1795,0.1852
Std,0.0248,0.0365,0.049,0.0196,0.0236,0.0536,0.0535


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
--------------------------------------------------------------------------------
SVC
Custom config ...


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5792,0.5532,0.7133,0.6296,0.6689,0.0978,0.0993
1,0.6375,0.6491,0.8392,0.6522,0.7339,0.1925,0.2081
2,0.6083,0.6319,0.7203,0.6561,0.6867,0.1675,0.1688
3,0.6569,0.6698,0.8169,0.6744,0.7389,0.2519,0.2619
Mean,0.6205,0.626,0.7724,0.6531,0.7071,0.1774,0.1845
Std,0.0295,0.0441,0.0562,0.0159,0.03,0.0553,0.0593


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


  0%|          | 0/5 [00:00<?, ?it/s]

OSError: Cannot save file into a non-existent directory: 'results\climate'

In [4]:
# Save the scores to a CSV file.
swefi_scores.to_csv('results_climate/result.csv')

In [6]:
# Plot the SWEFI scores using Plotly.
swefi_scores = pd.read_csv('results_climate/result.csv', index_col=0)
index = swefi_scores.index
value = swefi_scores['mean(SWEFI)']
error = swefi_scores['std(SWEFI)']

fig = go.Figure(go.Bar(
    y=value,
    x=index,
    error_y=dict(type='data', array=error),
))

fig.update_layout(
    xaxis=dict(
        title='Feature',
        zeroline=True,
        showline=True,
        zerolinecolor='black',
        zerolinewidth=3,
        linecolor='black',
        linewidth=3,
        mirror=True,
        tickangle=45,
        tickfont=dict(
            family='Arial',
            size=18,
            color='black',
        )
    ),
    yaxis=dict(
        title='Importance',
        showgrid=True,
        zeroline=True,
        showline=True,
        gridcolor='black',
        gridwidth=1,
        zerolinecolor='black',
        zerolinewidth=3,
        linecolor='black',
        linewidth=3,
        mirror=True,
        tickfont=dict(
            family='Arial',
            size=18,
            color='black',
        )
    ),
    margin=dict(l=10, r=10, b=10, t=10),
    paper_bgcolor='white',
    plot_bgcolor='lightgrey',
    width=2000,
    height=1500,
    bargap=0.1,
    bargroupgap=0.1,
)

# To save the figure as an image, uncomment the next line:
# fig.write_image('swefi_success_time_series_vertical.png')
fig.show()
