In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('reliance_stock_data.csv', parse_dates=['Date'], index_col='Date')

# Display the first few rows
print(data.head())

               Open    High      Low    Close  Adj_Close     Volume
Date                                                               
2025-04-07  1125.00  1171.0  1115.55  1166.20    1166.20  1433461.0
2025-04-04  1240.00  1245.2  1192.85  1204.70    1204.70   785059.0
2025-04-03  1238.95  1251.2  1234.55  1248.60    1248.60   532522.0
2025-04-02  1250.35  1256.0  1243.80  1251.10    1251.10   243303.0
2025-04-01  1274.60  1277.8  1250.00  1252.45    1252.45   722008.0


In [2]:
# Sort the dataset by Date in ascending order
data = data.sort_index()

# Verify that the index is sorted
print(data.index.is_monotonic_increasing)  # Should return True

True


In [3]:
# Split data into training and testing sets based on date
train = data.loc[:]
# test = data.loc['2025-01-01':]

# Display training and testing sets
print("Training Set:")
print(train.head())
# print("\nTesting Set:")
# print(test.head())

Training Set:
               Open     High      Low    Close  Adj_Close    Volume
Date                                                               
2023-01-09  1271.85  1300.90  1269.55  1298.28    1280.41  596984.0
2023-01-10  1302.03  1302.50  1273.03  1278.97    1261.38  305284.0
2023-01-11  1277.60  1279.10  1261.00  1262.75    1245.38  255590.0
2023-01-12  1262.50  1265.95  1232.82  1236.05    1219.04  370686.0
2023-01-13  1234.30  1236.60  1217.40  1233.68    1216.70  497964.0


In [4]:
# Print the lines around the problematic area
with open('new_articles_with_ewma.csv', 'r', encoding='utf-8', errors='replace') as f:
    for i, line in enumerate(f, 1):
        if 984 <= i <= 988:  # Print lines 984-988
            print(f"Line {i}: {line}")

Line 984: 2.0

Line 985: 28%

Line 986: Aditya Birla

Line 987: 57

Line 988: Mumbai



In [5]:
pip install --upgrade pandas

Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m97.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.[0m[31m
[0mSuccessfully installed pandas-2.2.3


In [7]:
import pandas as pd
import numpy as np
# !pip install statsmodels
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load stock price data
stock_data = pd.read_csv('reliance_stock_data.csv')
stock_data['Date'] = pd.to_datetime(stock_data['Date'], format='%Y-%m-%d')
stock_data.set_index('Date', inplace=True)
stock_data.sort_index(inplace=True)

# Load sentiment data with error handling
# Try reading while skipping malformed lines
sentiment_data = pd.read_csv('new_articles_with_ewma.csv', on_bad_lines='skip')

# Convert date column to datetime
sentiment_data['date'] = pd.to_datetime(sentiment_data['date'])

# SOLUTION FOR ISSUE #1:
# Group by date and take the last EWMA-3 value for each date
ewma_data = sentiment_data.groupby('date')['ewma_3'].last().reset_index()
ewma_data.rename(columns={'date': 'Date'}, inplace=True)
ewma_data.set_index('Date', inplace=True)
ewma_data.sort_index(inplace=True)

# SOLUTION FOR ISSUE #2:
# Join with stock data and forward fill missing values
merged_data = stock_data.join(ewma_data, how='left')

# Forward fill missing EWMA values (uses last available EWMA)
merged_data['ewma_3'].fillna(method='ffill', inplace=True)
# For any missing values at the beginning, backward fill
merged_data['ewma_3'].fillna(method='bfill', inplace=True)

# Print the first few rows to verify the data
print("First few rows of merged data:")
print(merged_data.head())

First few rows of merged data:
               Open     High      Low    Close  Adj_Close    Volume    ewma_3
Date                                                                         
2023-01-09  1271.85  1300.90  1269.55  1298.28    1280.41  596984.0 -0.007061
2023-01-10  1302.03  1302.50  1273.03  1278.97    1261.38  305284.0 -0.007061
2023-01-11  1277.60  1279.10  1261.00  1262.75    1245.38  255590.0 -0.007061
2023-01-12  1262.50  1265.95  1232.82  1236.05    1219.04  370686.0 -0.007061
2023-01-13  1234.30  1236.60  1217.40  1233.68    1216.70  497964.0 -0.007061


  sentiment_data['date'] = pd.to_datetime(sentiment_data['date'])
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_data['ewma_3'].fillna(method='ffill', inplace=True)
  merged_data['ewma_3'].fillna(method='ffill', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_data['ewma_3'].fillna(method='bfill', i

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from datetime import datetime, timedelta
import pickle
import os


def get_models(force_refit=False):
    # File paths for saved models
    stock_model_path = 'stock_model.pkl'
    ewma_model_path = 'ewma_model.pkl'

    # Check if models already exist and we don't want to force refit
    if not force_refit and os.path.exists(stock_model_path) and os.path.exists(ewma_model_path):
        print("Loading existing models...")
        with open(stock_model_path, 'rb') as f:
            full_model_fit = pickle.load(f)
        with open(ewma_model_path, 'rb') as f:
            ewma_model_fit = pickle.load(f)
    else:
      # 1. Retrain the model on the full dataset
      full_data = merged_data  # All data from Jan 2023 to Mar 2025
      full_exog = full_data[['ewma_3']]

      # Define SARIMA parameters
      p, d, q = 4, 1, 4  # Non-seasonal parameters
      P, D, Q, s = 0,0,0,12  # Seasonal parameters (monthly seasonality)

      # Fit the model on the full dataset for stock price prediction
      print("Fitting SARIMAX model on full dataset for stock price forecasting...")
      full_model = SARIMAX(full_data['Close'],
                          exog=full_exog,
                          order=(p, d, q),
                          seasonal_order=(P, D, Q, s),
                          enforce_stationarity=False,
                          enforce_invertibility=False)
      full_model_fit = full_model.fit(disp=False)

      # Train a separate SARIMA model for EWMA prediction
      print("Fitting SARIMA model for EWMA forecasting...")
      ewma_model = SARIMAX(full_data['ewma_3'],
                          order=(1, 1, 1),  # Adjust these parameters as needed
                          seasonal_order=(1, 1, 1, 7),  # Using weekly seasonality for sentiment
                          enforce_stationarity=False,
                          enforce_invertibility=False)
      ewma_model_fit = ewma_model.fit(disp=False)

      # Save models
      with open(stock_model_path, 'wb') as f:
          pickle.dump(full_model_fit, f)
      with open(ewma_model_path, 'wb') as f:
          pickle.dump(ewma_model_fit, f)
    return full_model_fit, ewma_model_fit


# Function to get user input for prediction date
def get_prediction_date():
    while True:
        try:
            date_str = input("Enter a future date for prediction (YYYY-MM-DD): ")
            pred_date = datetime.strptime(date_str, "%Y-%m-%d").date()

            # Check if date is after the last date in our dataset
            last_date = merged_data.index[-1].date()
            if pred_date <= last_date:
                print(f"Please enter a date after {last_date}")
                continue

            return pred_date
        except ValueError:
            print("Invalid date format. Please use YYYY-MM-DD format.")

# Function to predict stock price for a specified date
def predict_stock_price(target_date,full_model_fit,ewma_model_fit):
    # Convert to datetime object if it's not already
    if isinstance(target_date, str):
        target_date = datetime.strptime(target_date, "%Y-%m-%d")

    # Calculate how many days ahead we need to predict
    last_date = merged_data.index[-1].to_pydatetime()
    days_ahead = (target_date - last_date.date()).days

    if days_ahead <= 0:
        print("Target date is not in the future.")
        return None

    # Create date range from last data point to target date
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1),
                               end=target_date, freq='D')

    # Predict future EWMA values using the EWMA model
    ewma_forecast = ewma_model_fit.get_forecast(steps=len(future_dates))
    predicted_ewma = ewma_forecast.predicted_mean
    print(predicted_ewma)

    # Reindex predicted_ewma to match future_dates
    predicted_ewma.index = future_dates

    # Create DataFrame for future EWMA values
    future_ewma = pd.DataFrame({'ewma_3': predicted_ewma}, index=future_dates)

    print(future_ewma )
    # Generate stock price predictions using the predicted EWMA values
    future_predictions = full_model_fit.get_forecast(steps=len(future_dates), exog=future_ewma)
    forecast_mean = future_predictions.predicted_mean

    print(forecast_mean)

    forecast_mean.index = future_dates

    # Create a DataFrame with just the predictions we need
    forecast_df = pd.DataFrame({
        'Predicted_Close': forecast_mean,
        'Predicted_EWMA': predicted_ewma
    }, index=future_dates)

    print(forecast_df)

    return forecast_df

# Main execution flow
def main():
    # Load or fit models (set force_refit=True to refit models)
    full_model_fit, ewma_model_fit = get_models(force_refit=False)

    # Get user input
    target_date = get_prediction_date()

    # Make prediction
    print(f"Predicting stock price for {target_date}...")
    forecast_df = predict_stock_price(target_date,full_model_fit, ewma_model_fit)

    if forecast_df is not None:
        # Print prediction for target date only
        target_prediction = forecast_df.loc[target_date] if target_date in forecast_df.index else forecast_df.iloc[-1]

        print("\n===== PREDICTION RESULTS =====")
        print(f"Date: {target_date}")
        print(f"Predicted Stock Price: ₹{target_prediction['Predicted_Close']:.2f}")
        print(f"Predicted EWMA Sentiment: {target_prediction['Predicted_EWMA']:.4f}")
        print("=============================")

# Run the program
if __name__ == "__main__":
    main()

Fitting SARIMAX model on full dataset for stock price forecasting...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting SARIMA model for EWMA forecasting...
Enter a future date for prediction (YYYY-MM-DD): 2025-04-13
Predicting stock price for 2025-04-13...
552   -0.005265
553   -0.020950
554   -0.018335
555   -0.026509
556   -0.029074
557   -0.016263
Name: predicted_mean, dtype: float64
              ewma_3
2025-04-08 -0.005265
2025-04-09 -0.020950
2025-04-10 -0.018335
2025-04-11 -0.026509
2025-04-12 -0.029074
2025-04-13 -0.016263
552    1166.034599
553    1168.589236
554    1171.720668
555    1173.323469
556    1175.334988
557    1176.640972
Name: predicted_mean, dtype: float64
            Predicted_Close  Predicted_EWMA
2025-04-08      1166.034599       -0.005265
2025-04-09      1168.589236       -0.020950
2025-04-10      1171.720668       -0.018335
2025-04-11      1173.323469       -0.026509
2025-04-12      1175.334988       -0.029074
2025-04-13      1176.640972       -0.016263

===== PREDICTION RESULTS =====
Date: 2025-04-13
Predicted Stock Price: ₹1176.64
Predicted EWMA Sentiment: -0.0163


  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
