In [None]:

# Imports and configuration
import warnings
warnings.filterwarnings("ignore")

import os
import datetime as dt
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import joblib

print("Libraries imported successfully.")
    

In [None]:
'''
# Load the dataset (replace the filename if different)
# Expecting a CSV with OHLCV columns for Reliance stock, e.g., Date, Open, High, Low, Close, Adj Close, Volume
# If you already have a prepared dataset, update the path below
csv_path = "reliance.csv"  # change this if your file has a different name

if not os.path.exists(csv_path):
    print("File not found:", csv_path)
    print("Please upload reliance.csv or update csv_path to your file name.")
else:
    df = pd.read_csv(csv_path)
    print("Dataset loaded:", csv_path)
    print("Shape:", df.shape)
    print("Columns:", list(df.columns))
    print(df.head())
'''
# Taking user input
stock_name = input("Enter stock symbol (e.g., RELIANCE.NS): ")
start_date = input("Enter start date (YYYY-MM-DD): ")
end_date = input("Enter end date (YYYY-MM-DD): ")

# Downloading dataset
data = yf.download(stock_name, start=start_date, end=end_date)
stock_df = pd.DataFrame(data)

# Display first few rows
print(stock_df.head())

In [None]:

# Basic cleanup and feature engineering for classification (next-day Up/Down)
# - Parse Date
# - Sort by Date
# - Create returns, moving averages, RSI-like features (simple), and a target: next-day up (1) vs down/flat (0)

if 'df' in globals():
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values('Date').reset_index(drop=True)
    
    # Ensure numeric columns
    for c in df.columns:
        if c != 'Date':
            df[c] = pd.to_numeric(df[c], errors='coerce')

    # Create simple features if standard OHLCV columns exist
    has_close = 'Close' in df.columns
    has_open = 'Open' in df.columns
    has_high = 'High' in df.columns
    has_low = 'Low' in df.columns
    has_volume = 'Volume' in df.columns

    if has_close:
        df['Return_1d'] = df['Close'].pct_change()
        df['MA_5'] = df['Close'].rolling(5).mean()
        df['MA_10'] = df['Close'].rolling(10).mean()
        df['MA_20'] = df['Close'].rolling(20).mean()
        df['STD_10'] = df['Close'].rolling(10).std()
        df['Momentum_3'] = df['Close'] / df['Close'].shift(3) - 1
        df['High_Low_Spread'] = np.where(has_high and has_low, (df['High'] - df['Low']) / df['Close'], np.nan)
        df['Open_Close_Change'] = np.where(has_open, (df['Close'] - df['Open']) / df['Open'], np.nan)
        df['Volume_Change'] = np.where(has_volume, df['Volume'].pct_change(), np.nan)
        
        # Target: next-day up (1) if next day's close > today's close, else 0
        df['Close_next'] = df['Close'].shift(-1)
        df['Target'] = (df['Close_next'] > df['Close']).astype(int)
    
    # Drop rows with NaNs introduced by rolling/shift
    df = df.dropna().reset_index(drop=True)
    
    print("After feature engineering, shape:", df.shape)
    print(df.head())
else:
    print("Dataframe df not found. Please load dataset first.")
    

In [None]:

# Explore dataset: info and basic plots
if 'df' in globals():
    print("Dataset Info:")
    print(df.info())

    # Count plot for Target
    if 'Target' in df.columns:
        plt.figure(figsize=(5,4))
        sns.countplot(x='Target', data=df, palette='coolwarm')
        plt.title('Count of Next-day Up (1) vs Not Up (0)')
        plt.show()

    # Line graph for Close and Volume if present
    if 'Close' in df.columns:
        plt.figure(figsize=(10,4))
        plt.plot(df['Close'].values, label='Close', color='blue')
        plt.xlabel('Index')
        plt.ylabel('Close')
        plt.title('Close Price Over Time')
        plt.legend()
        plt.show()

    if 'Volume' in df.columns:
        plt.figure(figsize=(10,4))
        plt.plot(df['Volume'].values, label='Volume', color='orange')
        plt.xlabel('Index')
        plt.ylabel('Volume')
        plt.title('Volume Over Time')
        plt.legend()
        plt.show()

    # Correlation heatmap for features
    feature_cols = [c for c in df.columns if c not in ['Date','Target']]
    corr = df[feature_cols].corr()
    plt.figure(figsize=(10,8))
    sns.heatmap(corr, annot=False, cmap='coolwarm')
    plt.title('Feature Correlation Heatmap')
    plt.show()
else:
    print("Dataframe df not found. Please load dataset first.")
    

In [None]:

# Train-test split
if 'df' in globals() and 'Target' in df.columns:
    X = df.drop(columns=['Target'])
    if 'Date' in X.columns:
        X = X.drop(columns=['Date'])
    y = df['Target']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=32, stratify=y
    )

    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_train value counts:")
    print(y_train.value_counts())
else:
    print("Target not prepared or df missing. Please ensure previous steps succeeded.")
    

In [None]:

# Feature scaling
if 'X_train' in globals():
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print("Scaling complete. Shapes:")
    print("X_train_scaled:", X_train_scaled.shape)
    print("X_test_scaled:", X_test_scaled.shape)
else:
    print("Training data not found. Run train-test split cell first.")
    

In [None]:

# Train the model
if 'X_train_scaled' in globals():
    model = LogisticRegression(max_iter=1000, n_jobs=None, class_weight='balanced')
    model.fit(X_train_scaled, y_train)
    print("Model trained: LogisticRegression with class_weight balanced and max_iter 1000")
else:
    print("Scaled features not found. Please run scaling step.")
    

In [None]:

# Make predictions
if 'model' in globals():
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)[:, 1]
    print("Predictions completed.")
else:
    print("Model not trained yet.")
    

In [None]:

# Evaluate model performance
if 'y_pred' in globals():
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy:", acc)
    print("Classification Report:")
    print(classification_report(y_test, y_pred, digits=4))

    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
else:
    print("No predictions found. Run the prediction cell first.")
    

In [None]:

# Predict for a new sample (use last available row's features as template)
if 'X_test' in globals():
    sample_features = X_test.iloc[[0]]
    sample_scaled = scaler.transform(sample_features)
    sample_pred = model.predict(sample_scaled)[0]
    sample_prob = model.predict_proba(sample_scaled)[0,1]
    print("Sample prediction (0=Not Up, 1=Up):", int(sample_pred))
    print("Sample probability of Up:", float(sample_prob))
    print(sample_features.head())
else:
    print("X_test not found. Please run previous steps.")
    

In [None]:

# Save the trained model and scaler
if 'model' in globals():
    timestamp_str = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
    model_path = "reliance_model_" + timestamp_str + ".pkl"
    scaler_path = "reliance_scaler_" + timestamp_str + ".pkl"
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)
    print("Model saved to:", model_path)
    print("Scaler saved to:", scaler_path)
else:
    print("Model not found. Train the model before saving.")
    

In [None]:

# Load model and scaler (sanity check)
if 'model_path' in globals() and 'scaler_path' in globals():
    loaded_model = joblib.load(model_path)
    loaded_scaler = joblib.load(scaler_path)
    check_pred = loaded_model.predict(loaded_scaler.transform(sample_features))[0]
    print("Loaded model prediction on sample:", int(check_pred))
    print("Reload sanity check completed.")
else:
    print("Saved model paths not found. Run save cell first.")
    