In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, median_absolute_error

In [None]:
data = pd.read_excel('data.xlsx')
x = data.drop(columns="target")
y = data[["target"]].values

x = RobustScaler().fit_transform(x)

x_train1, x_test, y_train1, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(x_train1)
data2 = kde.sample(100)
data2 = pd.DataFrame(np.column_stack([data2]), columns=list(data.columns))
data2['target'] = np.random.choice(y_train1.flatten(), size=100, replace=True)
data3 = pd.concat([data, data2], ignore_index=True)
x2 = data3.drop('target', axis=1).values
y2 = data3['target'].values

param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10]
}

gb = GradientBoostingRegressor(random_state=42)
grid_search = GridSearchCV(
    estimator=gb,
    param_grid=param_grid,
    cv=5,
    scoring='r2',
    n_jobs=-1
)
grid_search.fit(x2, y2)
best_model = grid_search.best_estimator_
y_pred_test_1 = best_model.predict(x_test)

print('Best Parameters:', grid_search.best_params_)
print('R2 :', r2_score(y_test, y_pred_test_1))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred_test_1)))

In [None]:
gtm = eGTR()                                               #GTM for regression
y_pred_test_2 = gtm.predict(x_test)
print('Best Parameters:', grid_search.best_params_)
print('R2 :', r2_score(y_test, y_pred_test_2))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred_test_2)))


transformed = eGTM().fit(x_train).transform(x_test)        #GTM projection of test set
df = pd.DataFrame(transformed, columns=["x1", "x2"])
alt.Chart(df).mark_point().encode(
x='x1',y='x2',
tooltip=["x1", "x2"]
).properties(title="GTM projection of X_test").interactive()

transformed = eGTM().fit(x_train).transform(x_test)        #GTM projection of all data
df = pd.DataFrame(transformed, columns=["x1", "x2"])
df["IE"] = y
chart = alt.Chart(df).mark_point().encode(
    x='x1',
    y='x2',
    color=alt.Color('IE', scale=alt.Scale(scheme='viridis')),
    tooltip=["x1", "x2", "IE"]
).properties(title="GTM Projection of Full Dataset").interactive()
chart.save("gtm_full_visualization.html")


IL_A_features = X.iloc[-3].values                          #AD calculation
IL_B_features = X.iloc[-2].values
IL_C_features = X.iloc[-1].values

X_test = np.array([IL1_features, IL2_features, IL3_features])

centroid = np.mean(X_scaled, axis=0)
distances = cdist(X_test, [centroid], metric="euclidean")

iso_forest = IsolationForest(contamination=0.05, random_state=42)
iso_forest.fit(X_scaled)
outliers = iso_forest.predict(X_test)

for i, inhibitor in enumerate(["IL1", "IL2", "IL3"]):
    status = "Outside AD" if outliers[i] == -1 else "Inside AD"
    print(f"{inhibitor} - Distance from centroid: {distances[i][0]:.2f}, Status: {status}")
