In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv('reliance_stock_data.csv', parse_dates=['Date'], index_col='Date')

# Display the first few rows
print(data.head())

In [None]:
# Sort the dataset by Date in ascending order
data = data.sort_index()

# Verify that the index is sorted
print(data.index.is_monotonic_increasing)  # Should return True

In [None]:
# Split data into training and testing sets based on date
train = data.loc[:]
# test = data.loc['2025-01-01':]

# Display training and testing sets
print("Training Set:")
print(train.head())
# print("\nTesting Set:")
# print(test.head())

In [None]:
pip install --upgrade pandas

In [None]:
import pandas as pd
import numpy as np
# !pip install statsmodels
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load stock price data
stock_data = pd.read_csv('reliance_stock_data.csv')
stock_data['Date'] = pd.to_datetime(stock_data['Date'], format='%Y-%m-%d')
stock_data.set_index('Date', inplace=True)
stock_data.sort_index(inplace=True)

# Load sentiment data with error handling
# Try reading while skipping malformed lines
try:
    sentiment_data = pd.read_csv('new_articles_with_ewma.csv', on_bad_lines='skip')
except pd.errors.ParserError:
    sentiment_data = pd.read_csv('new_articles_with_ewma.csv', error_bad_lines=False)



# Convert date column to datetime
sentiment_data['date'] = pd.to_datetime(sentiment_data['date'])

# SOLUTION FOR ISSUE #1:
# Group by date and take the last EWMA-3 value for each date
ewma_data = sentiment_data.groupby('date')['ewma_3'].last().reset_index()
ewma_data.rename(columns={'date': 'Date'}, inplace=True)
ewma_data.set_index('Date', inplace=True)
ewma_data.sort_index(inplace=True)

# SOLUTION FOR ISSUE #2:
# Join with stock data and forward fill missing values
merged_data = stock_data.join(ewma_data, how='left')

# Forward fill missing EWMA values (uses last available EWMA)
merged_data['ewma_3'].fillna(method='ffill', inplace=True)
# For any missing values at the beginning, backward fill
merged_data['ewma_3'].fillna(method='bfill', inplace=True)

# Print the first few rows to verify the data
print("First few rows of merged data:")
print(merged_data.head())