In [None]:
import os
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
os.environ["PYTHONIOENCODING"] = "utf-8"

In [None]:
import cupy as cp
print("CuPy Version:", cp.__version__)
print("CUDA Available:", cp.cuda.is_available())
print("GPU Compute Capability:", cp.cuda.Device(0).compute_capability)

In [None]:
!apt-get update && apt-get install -y locales
!locale-gen en_US.UTF-8
!update-locale LANG=en_US.UTF-8

In [None]:
!nvidia-smi

In [None]:
!pip install -q "cuda-python<13.0a0,>=12.0" "numba>=0.59.1,<0.60"

In [None]:
!pip install -q "cupy-cuda12x" "cudf-cu12" "cuml-cu12" "rmm-cu12" "pylibraft-cu12" "pylibcudf-cu12" -f https://pypi.nvidia.com


In [None]:
!pip install -q pandas numpy scipy scikit-learn


In [None]:
import cupy as cp
print("CuPy Version:", cp.__version__)
print("CUDA Available:", cp.cuda.is_available())
import cudf
import cuml
print("cuDF Version:", cudf.__version__)
print("cuML Version:", cuml.__version__)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
from cuml.svm import SVC
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from cuml.model_selection import StratifiedKFold
from cuml.preprocessing import LabelEncoder

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
print("Preferred Encoding:", locale.getpreferredencoding())

In [None]:
file_path = "loan_data.csv.csv"
loan_dataset_gpu = cudf.read_csv(file_path)

In [None]:
loan_dataset_gpu = cudf.read_csv(file_path)

In [None]:
loan_dataset_gpu['grade'].value_counts()

In [None]:
grade_mapping = {
    'A': 0,
    'B': 1,
    'C': 2,
    'D': 3,
    'E': 4,
    'F': 5,
    'G': 6
}

loan_dataset_gpu['grade'] = loan_dataset_gpu['grade'].map(grade_mapping)

In [None]:
loan_dataset_gpu['grade'].value_counts()

In [None]:
loan_dataset_gpu = loan_dataset_gpu.drop(columns=['sub_grade', 'id'])

In [None]:
loan_dataset_gpu_processed = loan_dataset_gpu.copy()

In [None]:
y = loan_dataset_gpu_processed["grade"]
X = loan_dataset_gpu_processed.drop(columns=['grade'])

In [None]:
numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns
categorical_columns = X.select_dtypes(include=['object']).columns


In [None]:
from cuml.preprocessing import StandardScaler
scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])
X = cudf.get_dummies(X, columns=categorical_columns)
bool_columns = X.select_dtypes(include=['bool']).columns

X[bool_columns] = X[bool_columns].astype(int)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_pd = X_train.to_pandas()
y_train_pd = y_train.to_pandas()
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train_pd, y_train_pd)

In [None]:
X_train_bal_cudf = cudf.DataFrame(X_train_bal)
X_test_cudf = cudf.DataFrame(X_test)

X_train_bal_cp =  X_train_bal_cudf.to_cupy().astype(cp.float32)
X_test_cp = X_test_cudf.to_cupy().astype(cp.float32)

In [None]:
y_train_bal_cudf = cudf.Series(y_train_bal)
y_test_bal_cudf = cudf.Series(y_test)

y_train_bal_cp = y_train_bal_cudf.to_cupy()

y_test_cp = y_test_bal_cudf.to_cupy()

In [None]:
import cupy as cp

print("Unique classes in y_train_gpu:", cp.unique(y_train_bal_cudf))
print("Class counts:\n", cudf.DataFrame(y_train_bal_cudf).value_counts())

In [None]:
print(f"X_train_pca_cudf shape: {X_train_bal_cudf.shape}")
print(f"y_train_gpu shape: {y_train_bal_cudf.shape}")

In [None]:
num_folds_bal = 10
kf_bal = StratifiedKFold(n_splits=num_folds_bal, shuffle=True)
scores_bal = []

In [None]:
from cuml.multiclass import OneVsRestClassifier
non_pca_bal_svm_model_2 = SVC(kernel="rbf",  C=1.0, class_weight="balanced", gamma='scale')
non_pca_bal_svm_gpu_2 = OneVsRestClassifier(non_pca_bal_svm_model_2)

for train_index_bal, val_index_bal in kf_bal.split(X_train_bal_cp, y_train_bal_cp):
    X_train_fold_bal, X_val_fold_bal = X_train_bal_cp[train_index_bal], X_train_bal_cp[val_index_bal]
    y_train_fold_bal, y_val_fold_bal = y_train_bal_cp[train_index_bal], y_train_bal_cp[val_index_bal]


    non_pca_bal_svm_gpu_2.fit(X_train_fold_bal, y_train_fold_bal)

    accuracy_bal = non_pca_bal_svm_gpu_2.score(X_val_fold_bal, y_val_fold_bal)
    scores_bal.append(accuracy_bal)

    print(f"Fold Accuracy: {accuracy_bal:.4f}")

print(f"\nMean CV Accuracy: {np.mean(scores_bal):.4f}")

In [None]:
y_pred_bal = non_pca_bal_svm_gpu_2.predict(X_test_cp)
y_pred_bal = cudf.Series(y_pred_bal)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
y_pred_np = y_pred_bal.to_pandas().to_numpy()
y_test_np = y_test_cp.get()
cm_bal = confusion_matrix(y_test_np, y_pred_np)

In [None]:
print(cm_bal)

In [None]:
from sklearn.metrics import classification_report
report_bal = classification_report(y_test_np, y_pred_np, digits=4)
print(report_bal)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_bal = accuracy_score(y_test_np, y_pred_np)
print(f"Accuracy: {accuracy_bal:.4f}")

In [None]:
from sklearn.metrics import precision_recall_curve
from sklearn.preprocessing import label_binarize

y_test_np = y_test_np.astype(int)
y_pred_prob = pca_bal_svm_gpu.decision_function(X_test_cp)
n_classes = len(np.unique(y_test_np))

y_test_bin = label_binarize(y_test_np, classes=np.arange(n_classes))
plt.figure(figsize=(8,6))
for i in range(n_classes):
    precision, recall, _ = precision_recall_curve(y_test_bin[:, i], y_pred_prob.get()[:, i])
    plt.plot(recall, precision, label=f'Class {i}')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()