In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif

In [49]:
k = 3
X, y = make_classification(
    n_features=20,
    n_informative=3,
    n_redundant=0,
    n_classes=2,
    n_clusters_per_class=2,
    random_state=42,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

anova_filter = SelectKBest(f_classif, k=k)
clf = LinearSVC()
anova_svm = make_pipeline(anova_filter, clf)
anova_svm.fit(X_train, y_train)

y_pred = anova_svm.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2)

In [50]:
report_df

Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.92,0.8,0.86,15.0
1,1,0.75,0.9,0.82,10.0
2,accuracy,0.84,0.84,0.84,0.84
3,macro avg,0.84,0.85,0.84,25.0
4,weighted avg,0.85,0.84,0.84,25.0


In [53]:
report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1]

In [55]:
report_df.loc[report_df["class"]!="accuracy"]

Unnamed: 0,class,precision,recall,f1-score,support,accuracy
0,0,0.92,0.8,0.86,15.0,0.84
1,1,0.75,0.9,0.82,10.0,0.84
3,macro avg,0.84,0.85,0.84,25.0,0.84
4,weighted avg,0.85,0.84,0.84,25.0,0.84


In [23]:
# Take the row where class is accuracy and make it a column
accuracy = report_df[report_df["class"] == "accuracy"].transpose().reset_index()
accuracy = accuracy.rename(columns={0: "accuracy"})
accuracy = accuracy.drop("index", axis=1)
# Put accuracy as header
report_df.columns = report_df.iloc[0]
report_df = report_df.drop(0)
# Add accuracy column
report_df = pd.concat([report_df, accuracy], axis=1)
report_df


Unnamed: 0,0,0.92,0.8,0.86,15.0,2
1,1,0.75,0.9,0.82,10.0,0.84
2,accuracy,0.84,0.84,0.84,0.84,0.84
3,macro avg,0.84,0.85,0.84,25.0,0.84
4,weighted avg,0.85,0.84,0.84,25.0,0.84
0,,,,,,accuracy


In [24]:
def get_feature_idx() -> np.array:
    return anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten()  > 0

In [32]:
import plotly.express as px

In [33]:
feature_idx = get_feature_idx()

In [37]:
px.bar(y=feature_idx.astype(int))