In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
import joblib

dataset = pd.read_csv('creditcard.csv')
dataset = dataset.dropna()
dataset.columns

In [None]:
dataset['Class'].value_counts()

In [None]:
#equilibrage
from imblearn.over_sampling import SMOTE

X = dataset.drop(columns=['Class'])
y = dataset['Class']
smote = SMOTE(random_state=42)
X_resampled_smote, y_resampled_smote = smote.fit_resample(X, y)

df_resampled = pd.concat([X_resampled_smote, y_resampled_smote], axis=1)
dataset= df_resampled
dataset['Class'].value_counts()

In [None]:
#again ; features (X) and target variable (y)
X = dataset.drop(columns=['Class']).values
y = dataset['Class'].values

In [None]:
#Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=73)

In [None]:
ms = MinMaxScaler()
X_train = ms.fit_transform(X_train)
X_test = ms.transform(X_test)  #normalization de l'échelle

In [None]:
#initialize and train the RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=6, random_state=73)
classifier.fit(X_train, y_train)

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
#print the confusion matrix and accuracy
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
#get feature importances
feature_importances = classifier.feature_importances_
feature_importances

In [None]:
#identify the indices of the top 10 features
top_feature_indices = np.argsort(feature_importances)[-10:]

In [None]:
#extract the top 10 features from the dataset
X_top_features_train = X_train[:, top_feature_indices]
X_top_features_test = X_test[:, top_feature_indices]

In [None]:
#retrain the model using only the top 10 features
classifier_top_features = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=6, random_state=73)
classifier_top_features.fit(X_top_features_train, y_train)

In [None]:
#evaluate the model on the test set
y_pred_top_features = classifier_top_features.predict(X_top_features_test)

In [None]:
#print the confusion matrix and accuracy
print(confusion_matrix(y_test, y_pred_top_features))
print("Accuracy:", accuracy_score(y_test, y_pred_top_features))

In [None]:
joblib.dump(classifier_top_features, 'classifier_top_features.joblib')

In [None]:
#save the indices of the top 10 features
np.save('top_feature_indices.npy', top_feature_indices)
print(top_feature_indices)

In [None]:
from lightgbm import LGBMClassifier
lgb = LGBMClassifier(boosting_type='gbdt', num_leaves=8, max_depth=-1, learning_rate=0.1, n_estimators=10, 
                     subsample_for_bin=200000, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=5,
                     subsample=1.0, subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0,
                     importance_type='split', force_col_wise=True)
lgb.fit(X_train, y_train)

In [None]:
y_pred = lgb.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))