In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.neighbors import KNeighborsRegressor
import numpy as np

# Define lists to store results
roc_auc_scores = []
mse_scores = []

# Define filenames for datasets
filenames = ['drug_embedding.csv']

# Iterate over datasets
for filename in filenames:
    # Load the dataset
    dataset = pd.read_csv(filename)

    # Assuming the target variable is in column 8 and features are from column 10 onwards
    X = dataset.iloc[:, 10:].values
    y = dataset.iloc[:, 8].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train a KNN regression model
    regressor = KNeighborsRegressor(n_neighbors=15, p=2, metric='euclidean')
    regressor.fit(X_train_scaled, y_train)

    # Predict on the test set
    y_pred = regressor.predict(X_test_scaled)

    # Determine threshold for binary classification
    threshold = np.median(y_train)

    # Convert regression predictions to binary classification labels
    y_pred_binary = (y_pred > threshold).astype(int)
    y_test_binary = (y_test > threshold).astype(int)

    # Calculate ROC AUC score
    roc_auc = roc_auc_score(y_test_binary, y_pred_binary)
    roc_auc_scores.append(roc_auc)

    # Calculate Mean Squared Error (MSE)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

# Calculate mean and standard deviation of ROC AUC scores
mean_roc_auc = np.mean(roc_auc_scores)
std_roc_auc = np.std(roc_auc_scores)

# Calculate mean and standard deviation of MSE scores
mean_mse = np.mean(mse_scores)
std_mse = np.std(mse_scores)

print("ROC AUC Score:", mean_roc_auc)
print("Standard Deviation of ROC AUC Scores:", std_roc_auc)
print("Mean Squared Error:", mean_mse)
print("Standard Deviation of MSE Scores:", std_mse)


ROC AUC Score: 0.5971428571428572
Standard Deviation of ROC AUC Scores: 0.0
Mean Squared Error: 0.0048693219486102
Standard Deviation of MSE Scores: 0.0


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import roc_auc_score
import numpy as np

# Define lists to store results
roc_auc_scores = []
mse_scores = []

# Define filenames for datasets
filenames = ['drug_embedding2.csv', 'ccl_feature_original2.csv', 'ccl_feature2.csv']

# Iterate over datasets
for filename in filenames:
    # Load the dataset
    dataset = pd.read_csv(filename)

    # Assuming the target variable is in column 8 and features are from column 10 onwards
    X = dataset.iloc[:, 10:].values
    y = dataset.iloc[:, 8].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train a KNN regression model
    regressor = KNeighborsRegressor(n_neighbors=15, p=2, metric='euclidean')
    regressor.fit(X_train_scaled, y_train)

    # Predict on the test set
    y_pred = regressor.predict(X_test_scaled)

    threshold = np.median(y_train)

    # Convert regression predictions to binary classification labels
    y_pred_binary = (y_pred > threshold).astype(int)
    y_test_binary = (y_test > threshold).astype(int)

    # Calculate ROC AUC score
    roc_auc = roc_auc_score(y_test_binary, y_pred_binary)
    roc_auc_scores.append(roc_auc)

    # Calculate Mean Squared Error (MSE)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

# Calculate mean and standard deviation of ROC AUC scores
mean_roc_auc = np.mean(roc_auc_scores)
std_roc_auc = np.std(roc_auc_scores)

# Calculate mean and standard deviation of MSE scores
mean_mse = np.mean(mse_scores)
std_mse = np.std(mse_scores)

print("ROC AUC Score:", mean_roc_auc)
print("Standard Deviation of ROC AUC Scores:", std_roc_auc)
print("Mean Squared Error:", mean_mse)
print("Standard Deviation of MSE Scores:", std_mse)

ROC AUC Score: 0.6548533137527798
Standard Deviation of ROC AUC Scores: 0.18872314976258073
Mean Squared Error: 0.7104561771668702
Standard Deviation of MSE Scores: 0.5469186509849133


In [3]:
roc_auc_scores_1 = [0.5971428571428572]  # Example ROC AUC scores


# Second set of code
roc_auc_scores_2 = [0.6548533137527798]  # Example ROC AUC score

# Calculate KNN gain for ROC AUC score
mean_roc_auc_1 = np.mean(roc_auc_scores_1)
mean_roc_auc_2 = np.mean(roc_auc_scores_2)
knn_gain_roc_auc = mean_roc_auc_2 - mean_roc_auc_1


print("KNN Gain:", knn_gain_roc_auc)

KNN Gain: 0.05771045660992258
