In [1]:
# Step 1: Install Libraries
!pip install yfinance ta joblib scikit-learn


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=e9580d2f53a199e7bd4e1a8eb61bb6ded3bb0840395d1bd8fd6a65e92c058306
  Stored in directory: /root/.cache/pip/wheels/a1/d7/29/7781cc5eb9a3659d032d7d15bdd0f49d07d2b24fec29f44bc4
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [2]:
# 📚 Imports
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import numpy as np # Import numpy to use np.inf

# 📈 Download NIFTY50 stock data (example: RELIANCE)
df = yf.download('RELIANCE.NS', period='1y', interval='1d')
df.dropna(inplace=True)

# 🔍 Compute custom features (no ta library used)

# 1. Moving Averages
df['SMA_5'] = df['Close'].rolling(window=5).mean()
df['SMA_10'] = df['Close'].rolling(window=10).mean()

# 2. Daily Return
df['Daily_Return'] = df['Close'].pct_change()

# 3. Volatility
df['Volatility'] = df['Close'].rolling(window=5).std()

# 4. Volume Change
df['Volume_Change'] = df['Volume'].pct_change()

# 🎯 Create target (1 = next day rise, 0 = fall)
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

# ❌ Remove NaNs
df.dropna(inplace=True)

# ✅ Features
features = ['SMA_5', 'SMA_10', 'Daily_Return', 'Volatility', 'Volume_Change']
X = df[features]
y = df['Target']

# Check for and handle infinite values in features
# Replace infinite values with NaN and then drop rows containing NaNs
X.replace([np.inf, -np.inf], np.nan, inplace=True)
# Align y with X after dropping rows
original_index = X.index
X.dropna(inplace=True)
y = y.loc[X.index]


# 🔀 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 🤖 Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 🎯 Evaluate
preds = model.predict(X_test)
acc = accuracy_score(y_test, preds)
print(f"✅ Accuracy: {acc:.2f}")

# 💾 Save model
joblib.dump(model, 'simple_stock_model.pkl')
print("🎉 Model saved as 'simple_stock_model.pkl'")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)


✅ Accuracy: 0.52
🎉 Model saved as 'simple_stock_model.pkl'


In [4]:
from google.colab import files
files.download('simple_stock_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>