In [77]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from datetime import datetime
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from PIL import Image
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

In [78]:
# Switch to the 'Agg' backend
plt.switch_backend('Agg')

# Step 1: Read the CSV file and parse the dates
file_path = 'C:\\Users\\User\\OneDrive\\Desktop\\python project\\final_new_adj_data.csv'
df = pd.read_csv(file_path)

In [79]:
# Step 2: Convert 'Date' column to datetime, handling multiple formats
def parse_date(date):
    for fmt in ('%m/%d/%Y', '%m-%d-%Y', '%d/%m/%Y', '%d-%m-%Y'):
        try:
            return pd.to_datetime(date, format=fmt)
        except ValueError:
            continue
    return pd.NaT

df['Date'] = df['Date'].apply(parse_date)

# Drop rows with invalid dates
df = df.dropna(subset=['Date'])

# Set the 'Date' column as the index and ensure it's a DatetimeIndex
df.set_index('Date', inplace=True)

In [80]:
# Step 3: Print info and head to check the data
print(df.info())
print("Columns in the DataFrame:", df.columns)
print(df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100120 entries, 2014-04-01 to 2024-05-30
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   Stock      100120 non-null  object 
 1   Open       100120 non-null  float64
 2   High       100120 non-null  float64
 3   Low        100120 non-null  float64
 4   Close      100120 non-null  float64
 5   Adj Close  100120 non-null  float64
 6   Volume     100120 non-null  int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 6.1+ MB
None
Columns in the DataFrame: Index(['Stock', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')
                 Stock        Open        High         Low       Close  \
Date                                                                     
2014-04-01  1.RELIANCE  427.907715  432.022217  424.844696  430.925018   
2014-04-02  1.RELIANCE  432.022217  439.794037  428.867767  438.056793   
2014-04-03  1.RELIANCE  437.

In [82]:
# Step 4: Filter for the specific stock and transpose the DataFrame for plotting
options1 = ["1.RELIANCE"]
df1 = df[df['Stock'].isin(options1)].transpose()
print(df1.info())

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Stock to Volume
Columns: 2503 entries, 2014-04-01 to 2024-05-30
dtypes: object(2503)
memory usage: 137.2+ KB
None


In [83]:
# Step 5: Prepare data for plotting the adjusted close price
dates = df.index[df['Stock'] == '1.RELIANCE']
y = df[df['Stock'] == '1.RELIANCE']['Adj Close']
plt.plot(dates, y)
plt.xlabel('Date')
plt.ylabel('Adj Close')
plt.title('ADJUSTED CLOSING PRICE: RELIANCE')
plt.savefig('adj_close_price_reliance.png')
plt.close()
# Display the saved image
image_path = 'adj_close_price_reliance.png'
image = Image.open(image_path)
image.show()

In [84]:
# Step 6: Filter the DataFrame for the specific stock and resample by month-end
df_REL = df[df['Stock'] == '1.RELIANCE']
print("Step 6: DataFrame for specific stock (1.RELIANCE):\n", df_REL.head())
print("Index type before resampling:", type(df_REL.index))
dfm = df_REL.resample(rule='ME').sum()  # Use 'M' for month-end resampling
print("Step 6: Resampled DataFrame by month-end:\n", dfm.head())
dfm_1 = dfm.drop('Stock', axis=1)
print("Final DataFrame after dropping Stock column:\n", dfm_1.head())

Step 6: DataFrame for specific stock (1.RELIANCE):
                  Stock        Open        High         Low       Close  \
Date                                                                     
2014-04-01  1.RELIANCE  427.907715  432.022217  424.844696  430.925018   
2014-04-02  1.RELIANCE  432.022217  439.794037  428.867767  438.056793   
2014-04-03  1.RELIANCE  437.416779  440.228333  429.919250  435.519531   
2014-04-04  1.RELIANCE  435.222382  435.793823  430.444977  431.679321   
2014-04-07  1.RELIANCE  431.565033  436.571014  427.153381  431.633606   

             Adj Close    Volume  
Date                              
2014-04-01  402.424896   7820304  
2014-04-02  409.085022  13390090  
2014-04-03  406.715546  13275447  
2014-04-04  403.129364   6256938  
2014-04-07  403.086670   6452624  
Index type before resampling: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Step 6: Resampled DataFrame by month-end:
                                                         S

In [86]:
# Step 7: Plot the resampled adjusted close prices
dfm_1['Adj Close'].plot()
plt.xlabel('Date')
plt.ylabel('Adj Close Price Totals Monthly')
plt.title('ADJUSTED CLOSING PRICE MONTHLY: RELIANCE')
plt.savefig('adj_close_price_monthly_reliance.png')
plt.close()
# Display the saved image
image_path1 = 'adj_close_price_monthly_reliance.png'
image1 = Image.open(image_path1)
image1.show()

In [87]:
# Step 8: Perform seasonal decomposition
res = sm.tsa.seasonal_decompose(dfm_1['Adj Close'], model="multiplicative")
res.plot()
plt.savefig('seasonal_decomposition_reliance.png')
plt.close()

# Display the saved image
image_path2 = 'seasonal_decomposition_reliance.png'
image2 = Image.open(image_path2)
image2.show()

In [88]:
dfm_3=dfm_1[['Adj Close']]
dfm_3.tail()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2024-01-31,56615.899657
2024-02-29,61423.399902
2024-03-31,52680.599853
2024-04-30,58723.249755
2024-05-31,57385.800049


In [89]:
fig=plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
fig=sm.graphics.tsa.plot_acf(dfm_3['Adj Close'].iloc[13:],lags=40,ax=ax1)
ax2=fig.add_subplot(212)
fig=sm.graphics.tsa.plot_pacf(dfm_3['Adj Close'].iloc[13:],lags=40,ax=ax2)
# Saving the plots
plt.savefig('Adj_Close_acf_pacf.png')
plt.close()

# Display the saved image
image_path_Adj_Close_acf_pacf = 'Adj_Close_acf_pacf.png'
image_Adj_Close_acf_pacf = Image.open(image_path_Adj_Close_acf_pacf)
image_Adj_Close_acf_pacf.show()

In [91]:
### Testing for stationarity
 # Correct indentation
from statsmodels.tsa.stattools import adfuller
test_result=adfuller(dfm_3['Adj Close'])

#Ho: It is non stationary
#H1: It is stationary

def adfuller_test(dfm_3):
    result=adfuller(dfm_3)
    labels=['ADF test statistic','p-value','#lags used','no. of observations used']
    for value,label in zip(result,labels):
        print(label+':'+str(value))
    if result[1]<=0.05:
        print("strong evidence against Ho,reject Ho,data is stationary")
    else:
        print("weak evidence against Ho,accept Ho,data is non stationary")

In [93]:
adfuller_test(dfm_3['Adj Close'])

ADF test statistic:0.9574584852288655
p-value:0.9937961753183001
#lags used:10
no. of observations used:111
weak evidence against Ho,accept Ho,data is non stationary


In [94]:
from pmdarima import auto_arima
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

In [95]:
## Differencing

dfm_3['Adj_Close First Difference']=dfm_3['Adj Close']-dfm_3['Adj Close'].shift(1)
dfm_3['Seasonal First Difference']=dfm_3['Adj Close']-dfm_3['Adj Close'].shift(12)
adfuller_test(dfm_3['Adj_Close First Difference'].dropna())
adfuller_test(dfm_3['Seasonal First Difference'].dropna())

ADF test statistic:-5.0473389047515
p-value:1.7871952590653333e-05
#lags used:9
no. of observations used:111
strong evidence against Ho,reject Ho,data is stationary
ADF test statistic:-2.2314947182272635
p-value:0.19501386571632084
#lags used:12
no. of observations used:97
weak evidence against Ho,accept Ho,data is non stationary


In [96]:
fig=plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
fig=sm.graphics.tsa.plot_acf(dfm_3['Adj_Close First Difference'].iloc[13:],lags=40,ax=ax1)
ax2=fig.add_subplot(212)
fig=sm.graphics.tsa.plot_pacf(dfm_3['Adj_Close First Difference'].iloc[13:],lags=40,ax=ax2)
# Saving the plots
plt.savefig('Adj_Close_First_Difference_acf_pacf.png')
plt.close()

# Display the saved image
image_path_Adj_Close_First_Difference_acf_pacf = 'Adj_Close_First_Difference_acf_pacf.png'
image_Adj_Close_First_Difference_acf_pacf = Image.open(image_path_Adj_Close_First_Difference_acf_pacf)
image_Adj_Close_First_Difference_acf_pacf.show()

In [97]:

dfm_3['Seasonal Second Difference'] = dfm_3['Adj_Close First Difference'] - dfm_3['Adj_Close First Difference'].shift(12)
adfuller_test(dfm_3['Seasonal Second Difference'].dropna())

ADF test statistic:-3.7803685255509123
p-value:0.0031146794634063267
#lags used:11
no. of observations used:97
strong evidence against Ho,reject Ho,data is stationary


In [98]:
fig=plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
fig=sm.graphics.tsa.plot_acf(dfm_3['Seasonal Second Difference'].iloc[13:],lags=40,ax=ax1)
ax2=fig.add_subplot(212)
fig=sm.graphics.tsa.plot_pacf(dfm_3['Seasonal Second Difference'].iloc[13:],lags=40,ax=ax2)
# Saving the plots
plt.savefig('Seasonal_Second_Difference_acf_pacf.png')
plt.close()

# Display the saved image
image_path_Seasonal_Second_Difference_acf_pacf = 'Seasonal_Second_Difference_acf_pacf.png'
image_Seasonal_Second_Difference_acf_pacf = Image.open(image_path_Seasonal_Second_Difference_acf_pacf)
image_Seasonal_Second_Difference_acf_pacf.show()

In [99]:

stepwise_fit= auto_arima(
    y=dfm_3['Adj Close'], 
    seasonal=True, 
    m=12,  # Assuming monthly seasonality; adjust as needed
    stepwise=True, 
    trace=True, 
    error_action='ignore', 
    suppress_warnings=True
)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=2297.325, Time=0.65 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=2319.409, Time=0.01 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=2316.163, Time=0.13 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=2314.863, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=2319.104, Time=0.01 sec
 ARIMA(2,1,2)(0,0,1)[12] intercept   : AIC=2296.418, Time=0.48 sec
 ARIMA(2,1,2)(0,0,0)[12] intercept   : AIC=2294.457, Time=0.18 sec
 ARIMA(2,1,2)(1,0,0)[12] intercept   : AIC=2296.434, Time=0.63 sec
 ARIMA(1,1,2)(0,0,0)[12] intercept   : AIC=2307.919, Time=0.26 sec
 ARIMA(2,1,1)(0,0,0)[12] intercept   : AIC=2315.367, Time=0.06 sec
 ARIMA(3,1,2)(0,0,0)[12] intercept   : AIC=2296.411, Time=0.22 sec
 ARIMA(2,1,3)(0,0,0)[12] intercept   : AIC=2297.585, Time=0.45 sec
 ARIMA(1,1,1)(0,0,0)[12] intercept   : AIC=2306.337, Time=0.22 sec
 ARIMA(1,1,3)(0,0,0)[12] intercept   : AIC=2314.118, Time=0.30 sec
 ARIMA(3,1,1)(0,0,0

In [100]:
stepwise_fit.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,122.0
Model:,"SARIMAX(5, 1, 1)",Log Likelihood,-1129.776
Date:,"Tue, 27 Aug 2024",AIC,2275.552
Time:,18:43:58,BIC,2297.919
Sample:,04-30-2014,HQIC,2284.636
,- 05-31-2024,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,1035.1494,404.056,2.562,0.010,243.214,1827.085
ar.L1,-0.6734,0.160,-4.210,0.000,-0.987,-0.360
ar.L2,-0.2260,0.079,-2.865,0.004,-0.381,-0.071
ar.L3,-0.1109,0.071,-1.560,0.119,-0.250,0.028
ar.L4,-0.4842,0.071,-6.844,0.000,-0.623,-0.346
ar.L5,-0.5243,0.079,-6.607,0.000,-0.680,-0.369
ma.L1,0.3899,0.196,1.989,0.047,0.006,0.774
sigma2,7.944e+06,1.03e+06,7.723,0.000,5.93e+06,9.96e+06

0,1,2,3
Ljung-Box (L1) (Q):,0.01,Jarque-Bera (JB):,4.2
Prob(Q):,0.94,Prob(JB):,0.12
Heteroskedasticity (H):,6.98,Skew:,0.08
Prob(H) (two-sided):,0.0,Kurtosis:,3.9


In [101]:
print(dfm_3.shape)

(122, 4)


In [102]:
train_date = '2022-4-1'
test_date = '2022-3-31'
train_Rel=dfm_3[:train_date]
test_Rel=dfm_3[test_date:]

print(train_Rel.shape,test_Rel.shape)

(96, 4) (27, 4)


In [103]:
train_Rel['Adj Close'].plot(legend=True,label='Train')
test_Rel['Adj Close'].plot(legend=True,label='Test')
plt.savefig('futureDate.png')
plt.close()

# Display the saved image
image_futureDate = 'futureDate.png'
image_futureDate = Image.open(image_futureDate)
image_futureDate.show()

In [104]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
model=SARIMAX(train_Rel['Adj Close'],order=(1,1,6),seasonal_order=(1,2,1,12))
                                          
model_fit=model.fit(disp=False)

# Making predictions
start_index = len(train_Rel)
end_index = start_index + len(test_Rel) - 1
predictions = model_fit.predict(start=start_index, end=end_index)

# Ensure the predictions have the same index as the test data
predictions.index = test_Rel.index
# Combine the training data and the predicted values for a seamless transition
combined = pd.concat([train_Rel['Adj Close'], predictions])
# Display the first 5 predictions
print(predictions[:5])

Date
2022-03-31    38674.983115
2022-04-30    42055.895891
2022-05-31    53823.447655
2022-06-30    50761.938373
2022-07-31    49312.750079
Freq: ME, Name: predicted_mean, dtype: float64


In [105]:
# Model Evaluation
from sklearn.metrics import mean_squared_error

In [110]:
# Assuming test_REL_adjc_m is a DataFrame and predictions is a Series or array
# Check the shapes or lengths
print("Shape of test_Rel:", test_Rel.shape)
print("Length of predictions:", len(predictions))

# Ensure they have the same length or shape
if test_Rel.shape[0] != len(predictions):
    raise ValueError("Lengths of test_Rel and predictions must be the same.")

# Select the column of interest from the DataFrame if necessary
y_true = test_Rel['Adj Close']  # Assuming 'Adj_Close' is the column of interest

# Calculate RMSE
error = np.sqrt(mean_squared_error(y_true, predictions))
print("RMSE:", error)

#Calculate MAPE
mape=np.mean(np.abs(predictions - y_true )/np.abs(y_true))
print('MAPE:',mape)

# Alternatively, calculate mean and standard deviation
# Calculate mean and standard deviation for 'Adj_Close' column
mean_adj_close = test_Rel['Adj Close'].mean()
std_dev_adj_close = np.sqrt(test_Rel['Adj Close'].var())

print("Mean of Adj_Close:", mean_adj_close)
print("Standard Deviation of Adj_Close:", std_dev_adj_close)


Shape of test_Rel: (27, 4)
Length of predictions: 27
RMSE: 6627.177637483066
MAPE: 0.11692149491155945
Mean of Adj_Close: 49766.75400133333
Standard Deviation of Adj_Close: 5302.986980057101


In [111]:
futureDate=pd.DataFrame(pd.date_range(start='2024-06-30',end='2026-05-31',freq='M'),columns=['Dates'])
futureDate.set_index('Dates',inplace=True)
futureDate.head()

2024-06-30
2024-07-31
2024-08-31
2024-09-30
2024-10-31


In [112]:
forecast=model_fit.predict(start=futureDate.index[0],end=futureDate.index[-1])

combined = pd.concat([predictions,forecast])

In [113]:
#Visualizations
# Plotting the training and test data
dfm_3.dropna()
plt.plot(train_Rel['Adj Close'], label='Training Data')
plt.plot(test_Rel['Adj Close'], label='Test Data')
plt.plot(predictions, label='Predictions')
plt.plot(forecast, label='forecast')
plt.legend()




# Adding labels and title
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price')
plt.title('Training Data, Test Data, and Predictions')

plt.savefig('train_test_reliance.png')
plt.close()

# Display the saved image
image_path5 = 'train_test_reliance.png'
image5 = Image.open(image_path5)
image5.show()