In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(42)
n_samples = 500

data = pd.DataFrame({
    'gene_A_expr': np.random.normal(5, 2, n_samples),       # экспрессия гена A
    'gene_B_expr': np.random.normal(3, 1.5, n_samples),     # экспрессия гена B
    'gene_C_expr': np.random.normal(6, 2.5, n_samples),     # экспрессия гена C
    'immune_score': np.random.uniform(0, 1, n_samples),     # иммунный индекс
    'MRI_score': np.random.normal(50, 10, n_samples),       # баллы по MRI
    'responded': np.random.choice([0, 1], size=n_samples, p=[0.4, 0.6])  # целевая переменная
})

X = data.drop(columns='responded')
y = data['responded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Классификационный отчёт:\n")
print(classification_report(y_test, y_pred))

importances = model.feature_importances_
features = X.columns

plt.figure(figsize=(8, 5))
sns.barplot(x=importances, y=features)
plt.title("Feature Importance: Predicting Therapy Response")
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()
