In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout
from datetime import timedelta
import io
import base64
from matplotlib.figure import Figure
import os
from django.conf import settings

def predict_stock_prices(company_code):
    # Paths to your CSV files
    company_codes_and_names_path = os.path.join(settings.BASE_DIR, 'stockapp', 'data', 'company_codes_and_names.csv')
    daily_info_path = os.path.join(settings.BASE_DIR, 'stockapp', 'data', 'data_for_models.csv')

    # Load company codes and names
    company_codes = pd.read_csv(company_codes_and_names_path)

    # Find company ID based on code
    company_info = company_codes[company_codes['Company_Code'] == company_code]
    if company_info.empty:
        raise ValueError(f"Company code {company_code} not found in company_codes_and_names.csv")
    company_id = company_info['Company_ID'].values[0]

    # Load stock data
    data = pd.read_csv(daily_info_path)

    # Filter data for the specific company
    data = data[data['company_id'] == company_id]

    # Handle NULL values by forward filling
    data.ffill(inplace=True)

    # Extract relevant columns for modeling
    data['date'] = pd.to_datetime(data['date'])
    data.sort_values('date', inplace=True)
    df = data[['date', 'average_price']].copy()

    # Filter for the last year (or adjust the time range for prediction)
    last_three_months = df['date'].max() - timedelta(days=93)
    df_last_year = df[df['date'] >= last_three_months]

    # Normalize prices for LSTM (fit only on training data)
    scaler = MinMaxScaler(feature_range=(0, 1))
    df['scaled_price'] = scaler.fit_transform(df[['average_price']])

    # Prepare data for LSTM model
    def create_dataset(dataset, time_step=1):
        X, y = [], []
        for i in range(len(dataset) - time_step - 1):
            X.append(dataset[i:(i + time_step), 0])
            y.append(dataset[i + time_step, 0])
        return np.array(X), np.array(y)

    # Set time_step dynamically based on available data
    time_step = len(df)  # You can adjust this depending on your preference or dataset size

    data_scaled = df['scaled_price'].values.reshape(-1, 1)
    X, y = create_dataset(data_scaled, time_step)

    # Reshape input to be [samples, time steps, features]
    X = X.reshape(X.shape[0], X.shape[1], 1)

    # Split into training and testing sets (70% for training, 30% for testing)
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Build the LSTM model
    model = Sequential([
        Input(shape=(time_step, 1)),
        LSTM(50, return_sequences=True),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=64, verbose=1)

    # Predict on the testing set
    test_predict = model.predict(X_test)

    # Reverse scaling
    test_predict_rescaled = scaler.inverse_transform(test_predict.reshape(-1, 1))
    y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Evaluate the model with MSE and RMSE
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    mse = mean_squared_error(y_test_rescaled, test_predict_rescaled)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test_rescaled, test_predict_rescaled)

    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"Mean Absolute Error (MAE): {mae}")

    # Plotting historical vs predicted prices
    plt.figure(figsize=(12, 6))
    plt.plot(df_last_year['date'], df_last_year['average_price'], label='Actual Prices', color='blue')
    plt.plot(df_last_year['date'][-len(test_predict_rescaled):], test_predict_rescaled, label='Predicted Prices', color='red')
    plt.title(f'Price Prediction for {company_code}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

    # Predict for the next 7 days
    last_sequence = data_scaled[-time_step:]
    future_predictions = []

    # Loop for 7 predictions (future prices)
    for _ in range(7):
        pred = model.predict(last_sequence.reshape(1, time_step, 1))
        pred_value = pred[0, 0]

        # Clip prediction to the range of recent values (optional)
        min_value = np.min(data_scaled[-time_step:])
        max_value = np.max(data_scaled[-time_step:])
        pred_value = np.clip(pred_value, min_value, max_value)

        # Append prediction to the list
        future_predictions.append(pred_value)

        # Update the last_sequence with the predicted value
        last_sequence = np.append(last_sequence[1:], [[pred_value]], axis=0)

    # Reverse scaling for future predictions
    future_predictions_rescaled = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

    # Prepare data for plotting future predictions
    future_dates = [df_last_year['date'].iloc[-1] + timedelta(days=i) for i in range(1, 8)]

    # Plot data
    fig = Figure(figsize=(12, 6))
    ax = fig.add_subplot(1, 1, 1)

    # Plot historical prices for the last year
    ax.plot(df_last_year['date'], df_last_year['average_price'], label='Historical Prices', color='blue')

    # Plot future predictions
    ax.plot(future_dates, future_predictions_rescaled, '--', label='Predicted Future Prices', color='green')

    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    ax.set_title(f'Future Price Prediction for {company_code}')
    ax.legend()
    ax.grid()

    # Save plot to BytesIO
    buf = io.BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)
    encoded_image = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()

    # Return base64-encoded image
    return f"data:image/png;base64,{encoded_image}"

predict_stock_prices('STB')

ImproperlyConfigured: Requested setting BASE_DIR, but settings are not configured. You must either define the environment variable DJANGO_SETTINGS_MODULE or call settings.configure() before accessing settings.