In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import yfinance as yf

# Step 1: Data Collection
data = yf.download('AAPL', start='2020-01-01', end='2023-01-01')
data['MA_10'] = data['Close'].rolling(window=10).mean()
data['MA_30'] = data['Close'].rolling(window=30).mean()
data['Signal'] = 0
data['Signal'][10:] = [1 if data['MA_10'][i] > data['MA_30'][i] else 0 for i in range(10, len(data))]

# Step 2: Feature Engineering
data['Returns'] = data['Close'].pct_change()
data['Future_Return'] = data['Returns'].shift(-1)

# Define features and target
X = data[['MA_10', 'MA_30', 'Returns']][:-1]  # Exclude last row due to shifting
y = (data['Future_Return'] > 0).astype(int)[:-1]  # Buy if future return is positive

# Step 3: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 4: Train the Model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 5: Evaluate the Model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


[*********************100%***********************]  1 of 1 completed
  data['Signal'][10:] = [1 if data['MA_10'][i] > data['MA_30'][i] else 0 for i in range(10, len(data))]
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  data['Signal'][10:] = [1 if data['MA_10'][i] > data['MA_30'][i] else 0 for i in range(10, len(data

              precision    recall  f1-score   support

           0       0.52      0.43      0.47        79
           1       0.48      0.57      0.52        72

    accuracy                           0.50       151
   macro avg       0.50      0.50      0.50       151
weighted avg       0.50      0.50      0.49       151

