In [10]:
import yfinance as yf

def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="5y")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data

ticker = "AAPL"  # Replace with your ticker
data = fetch_data(ticker)
print(data.head())

                       Date       Open       High        Low      Close  \
0 2019-12-30 00:00:00-05:00  70.156601  70.939461  69.128952  70.655884   
1 2019-12-31 00:00:00-05:00  70.270538  71.179428  70.171165  71.172157   
2 2020-01-02 00:00:00-05:00  71.799881  72.856621  71.545395  72.796028   
3 2020-01-03 00:00:00-05:00  72.020447  72.851776  71.862907  72.088310   
4 2020-01-06 00:00:00-05:00  71.206077  72.701500  70.954010  72.662720   

      Volume  Dividends  Stock Splits  
0  144114400        0.0           0.0  
1  100805600        0.0           0.0  
2  135480400        0.0           0.0  
3  146322800        0.0           0.0  
4  118387200        0.0           0.0  


In [11]:
import pandas as pd
import numpy as np

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where(data['Date'].dt.month % 3 == 0, 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_20'] = data['Close'].rolling(window=20).mean()
    data['SMA_30'] = data['Close'].rolling(window=30).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_20'] = data['Close'].ewm(span=20, adjust=False).mean()
    data['EMA_30'] = data['Close'].ewm(span=30, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data

data = preprocess_data(data)
print(data.head())



                        Date       Open       High        Low      Close  \
29 2020-02-11 00:00:00-05:00  78.617285  78.690166  77.429277  77.647926   
30 2020-02-12 00:00:00-05:00  78.099807  79.496746  78.099807  79.491890   
31 2020-02-13 00:00:00-05:00  78.760610  79.253790  78.556537  78.925812   
32 2020-02-14 00:00:00-05:00  78.894235  79.195493  78.435071  78.945259   
33 2020-02-18 00:00:00-05:00  76.615384  77.681919  76.433174  77.499710   

       Volume  open-close  high-low  price-change  is_quarter_end     SMA_10  \
29   94323200    0.969359  1.260890     -0.006033               0  77.446171   
30  113730400   -1.392082  1.396939      0.023748               0  77.534311   
31   94747600   -0.165201  0.697253     -0.007121               0  77.577235   
32   80113600   -0.051024  0.760422      0.000246               0  77.970147   
33  152531200   -0.884326  1.248745     -0.018311               0  78.239105   

       SMA_20     SMA_30  
29  76.974504  75.725079  
30  77.1

In [12]:
# Define features (X) and target (y)
features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_20',"SMA_30"]]
target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [14]:
from sklearn.linear_model import LogisticRegression

# Initialize and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Save the scaler and model for reuse
from joblib import dump
dump(scaler, 'scaler.joblib')
dump(model, 'stock_price_predictor.joblib')


['stock_price_predictor.joblib']

In [15]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.524390243902439
Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.09      0.15       116
           1       0.53      0.92      0.67       130

    accuracy                           0.52       246
   macro avg       0.50      0.50      0.41       246
weighted avg       0.50      0.52      0.42       246



In [17]:
from joblib import load

# Load saved model and scaler
scaler1 = load('scaler.joblib')
model1 = load('stock_price_predictor.joblib')

# Example new data
new_data = [[5.0, -2.0, 1e7, 0,77.53,77.1,76.1]]  # Replace with actual feature values
new_data_scaled = scaler1.transform(new_data)
prediction = model1.predict(new_data_scaled)
probability = model1.predict_proba(new_data_scaled)

print("Prediction:", "Up" if prediction[0] == 1 else "Down")
print("Probability of Price Increase:", probability[0][1])


Prediction: Down
Probability of Price Increase: 0.49853982328759544


