In [329]:
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score

In [330]:
# Load the data
url_data = "https://raw.githubusercontent.com/Robiahraaw/Data-PSD-Robi/main/glioma.csv"
df = pd.read_csv(url_data)

In [331]:
# Define numerical columns
numerical_columns = ['Gender', 'Age_at_diagnosis', 'Race', 'IDH1', 'TP53', 'ATRX', 'PTEN', 'EGFR', 'CIC', 'MUC16',
                      'PIK3CA', 'NF1', 'PIK3R1', 'FUBP1', 'RB1', 'NOTCH1', 'BCOR', 'CSMD3', 'SMARCA4', 'GRIN2A',
                      'IDH2', 'FAT4', 'PDGFRA']

In [332]:
# Extract features (X) and target variable (y)
X = df[numerical_columns]
y = df['Grade']

In [333]:
# Normalisasi data menggunakan StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [334]:
# Simpan Standard Scaler ke dalam file pickle
with open('scaler_model.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

In [335]:
# Reduksi dimensi menggunakan PCA
pca = PCA(n_components=6)  # Anda bisa mengubah jumlah komponen sesuai kebutuhan
X = pca.fit_transform(X)

In [336]:
# Simpan PCA model ke dalam file pickle
with open('pca_model.pkl', 'wb') as pca_file:
    pickle.dump(pca, pca_file)

In [337]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8756)

In [338]:
# Create and train the Ridge Classifier
ridge_classifier = RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                                   max_iter=None, positive=False, random_state=8756, solver='auto',
                                   tol=0.0001)
ridge_classifier.fit(X_train, y_train)

In [339]:
# Make predictions on the test set
y_pred = ridge_classifier.predict(X_test)

In [340]:
# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.83


In [341]:
# Simpan model K-NN ke dalam file pickle
with open('rc_model.pkl', 'wb') as rc_file:
    pickle.dump(ridge_classifier, rc_file)