In [3]:
pip install sklearn-genetic-opt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [4]:


from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, make_scorer
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error


In [5]:
# Finansal veriyi oku
finance_df = pd.read_csv("finance_data.csv").head(10000)

# Hedef ve girdi secimi
target_column = 'close'
features = finance_df.drop(columns=['transactionDate', 'Symbol', 'CandleType', target_column])
target = finance_df[target_column]


In [6]:

features = features.dropna()
target = target.loc[features.index]


X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [7]:

features = features.dropna()
target = target.loc[features.index]


X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [8]:
# Ölçekleme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Model ve hiperparametre alanları
tree = ExtraTreesRegressor(random_state=42)
param_grid = {
    "n_estimators": Integer(50, 300),
    "max_depth": Integer(2, 20),
    "min_samples_split": Integer(2, 20),
    "min_samples_leaf": Integer(1, 10),
    "max_features": Categorical(['sqrt', 'log2', None])

}

In [10]:
# R2 skoru
scorer = make_scorer(r2_score)

ga_search = GASearchCV(
    estimator=tree,
    cv=3,
    scoring=scorer,
    param_grid=param_grid,
    n_jobs=-1,
    verbose=True,
    population_size=10,
    generations=5
)


In [11]:
# Egitim
ga_search.fit(X_train_scaled, y_train)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	10    	0.993438	0.00325613 	0.998273   	0.989783   
1  	20    	0.996431	0.00269552 	0.998645   	0.990391   
2  	20    	0.998399	0.000147122	0.998654   	0.998196   
3  	20    	0.99861 	0.000163144	0.999053   	0.998398   
4  	20    	0.998599	0.000198113	0.999053   	0.998192   
5  	20    	0.998686	0.000194709	0.999065   	0.998474   


In [12]:

# En iyi sonuc
y_pred = ga_search.predict(X_test_scaled)
print("Test R2 Score:", r2_score(y_test, y_pred))
print("Best Params:", ga_search.best_params_)

Test R2 Score: 0.9988324938534642
Best Params: {'n_estimators': 210, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_features': None}


In [13]:
# MAPE hesapla
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Test MAPE: {mape:.4f}")


Test MAPE: 0.0075


In [15]:
# 1. Orijinal veri (ilk 10.000 satır)
df = pd.read_csv("finance_data.csv").head(10000)

# 2. NaN satırları at (önceki işlemlere uygun)
df = df.dropna()

# 3. Test indekslerini al
test_indices = X_test.index  # X_test zaten doğru indekse sahip

# 4. Tahmin sütununu ekle (sadece test indekslerine)
df.loc[test_indices, 'Predicted_Close'] = y_pred

# 5. Yeni Excel dosyasına kaydet
df.to_excel("finance_data_with_predictions.xlsx", index=False)