In [102]:
#simulate nni tuner to generate next parameter
import json
import random

# 读取 search_space.json 文件
with open('search_space.json', 'r') as f:
    search_space = json.load(f)

print(search_space)


def get_next_parameter(search_space):
    params = {}
    for key, value in search_space.items():
        param_type = value["_type"]
        param_values = value["_value"]

        # 根据 _type 生成对应的值
        if param_type == "randint":
            params[key] = random.randint(param_values[0], param_values[1])
        elif param_type == "uniform":
            params[key] = random.uniform(param_values[0], param_values[1])
        elif param_type == "choice":
            params[key] = random.choice(param_values)
        else:
            raise ValueError(f"Unknown parameter type: {param_type}")
    
    return params

# 调用函数获取下一个参数
params = get_next_parameter(search_space)
print("Next params:", params)


{'n_estimators': {'_type': 'randint', '_value': [50, 200]}, 'max_depth': {'_type': 'randint', '_value': [10, 30]}, 'min_samples_split': {'_type': 'choice', '_value': [10, 20]}, 'min_samples_leaf': {'_type': 'choice', '_value': [30, 40]}, 'n_components': {'_type': 'choice', '_value': [2, 3]}, 'perplexity': {'_type': 'uniform', '_value': [5, 50]}, 'learning_rate': {'_type': 'choice', '_value': ['auto', 10, 1000]}, 'metric': {'_type': 'choice', '_value': ['euclidean', 'l1', 'l2', 'cosine']}}
Next params: {'n_estimators': 131, 'max_depth': 23, 'min_samples_split': 10, 'min_samples_leaf': 40, 'n_components': 3, 'perplexity': 18.570487610862727, 'learning_rate': 1000, 'metric': 'euclidean'}


In [68]:
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
import nni
import matplotlib.pyplot as plt
# from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor

data_path = "G:\ABCD\script/trail/trail_tsne_RF"
# load data and drop the first column and the subject id
data = pd.read_csv(data_path + "/merged.csv").drop(columns=["Unnamed: 0", "src_subject_id"])

# params = nni.get_next_parameter()
params = get_next_parameter(search_space)


In [86]:
# Select columns to drop: 'src_subject_id' and all columns not starting with 'cbcl'
label_columns = data.columns[data.columns.str.startswith("cbcl")].tolist()

# Drop those columns from merged_data to create X
X = data.drop(columns = label_columns)

y = data[label_columns]


In [103]:
tsne = TSNE(
        n_components=params['n_components'],
        perplexity=params['perplexity'],
        learning_rate=params['learning_rate'],
        metric=params['metric'],
        random_state=42
    )
y_tsne = tsne.fit_transform(y).mean(axis=1)

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np


pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestRegressor(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        min_samples_split=params['min_samples_split'],
        min_samples_leaf=params['min_samples_leaf'],
        random_state=42
    ))
])

# Perform 10-fold cross-validation
cv_scores = cross_val_score(pipeline, X, y_tsne, cv=5, scoring='r2')

# Report the mean R2 score from cross-validation to NNI
mean_cv_r2 = cv_scores.mean()
nni.report_final_result(mean_cv_r2)

# Print cross-validation scores
print(f'10-Fold Cross-Validation R2 Scores: {cv_scores}')
print(f'Mean R2 Score from 10-Fold Cross-Validation: {mean_cv_r2}')