In [2]:
import pandas as pd

# Load the CSV file
file_path = r"/content/BSBE.csv"
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to identify column names
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-10-02,25.0,25.0,25.0,25.0,23.705992,0
1,2018-10-03,24.889999,24.889999,24.889999,24.889999,23.601679,0
2,2018-10-04,24.844999,24.844999,24.844999,24.844999,23.559011,0
3,2018-10-05,24.83,24.83,24.83,24.83,23.544788,0
4,2018-10-08,24.865,24.865,24.865,24.865,23.577976,0


In [3]:
from statsmodels.stats.weightstats import ztest

# One sample Z-test: Testing if the mean of 'Close' prices is equal to 11.8
z_statistic, p_value = ztest(data['Close'], value=11.8)
print(f"Z-Test One Sample: Z={z_statistic}, p-value={p_value}")


Z-Test One Sample: Z=567.3403656847046, p-value=0.0


In [4]:
# Two sample Z-test: Comparing the means of 'Open' and 'Close' prices
z_statistic, p_value = ztest(data['Open'], data['Close'])
print(f"Z-Test Two Sample: Z={z_statistic}, p-value={p_value}")


Z-Test Two Sample: Z=0.26221431147367136, p-value=0.7931562203571536


In [5]:
from scipy.stats import ttest_1samp

# One sample T-test: Testing if the mean of 'Close' prices is equal to 11.8
t_statistic, p_value = ttest_1samp(data['Close'], 11.8)
print(f"T-Test One Sample: T={t_statistic}, p-value={p_value}")


T-Test One Sample: T=567.3403656847046, p-value=0.0


In [6]:
from scipy.stats import ttest_ind

# Two sample T-test: Comparing the means of 'Open' and 'Close' prices
t_statistic, p_value = ttest_ind(data['Open'], data['Close'])
print(f"T-Test Two Sample: T={t_statistic}, p-value={p_value}")


T-Test Two Sample: T=0.26221431147367136, p-value=0.7932280313526325


In [7]:
from scipy.stats import chi2_contingency
import pandas as pd

# Creating categories by binning 'Volume'
data['Volume_Category'] = pd.qcut(data['Volume'], q=4, labels=['Low', 'Medium', 'High', 'Very High'])

# Creating a contingency table
contingency_table = pd.crosstab(data['Volume_Category'], data['Close'] > data['Close'].mean())

# Chi-Square test
chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-Square Test: Chi2={chi2_stat}, p-value={p_value}, Degrees of Freedom={dof}")


Chi-Square Test: Chi2=16.565591054779468, p-value=0.0008680489048294102, Degrees of Freedom=3


In [8]:
from scipy.stats import f_oneway

# ANOVA F-test: Comparing the means of 'Open', 'High', and 'Close' prices
f_statistic, p_value = f_oneway(data['Open'], data['High'], data['Close'])
print(f"ANOVA F-Test: F={f_statistic}, p-value={p_value}")


ANOVA F-Test: F=0.21756361995722637, p-value=0.8045101725702333


In [9]:
import numpy as np

actual = data['Close']
predicted = data['Adj Close']

# MAD
mad = np.mean(np.abs(actual - predicted))
print(f"MAD: {mad}")

# MAPE
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
print(f"MAPE: {mape}%")

# MAE
mae = np.mean(np.abs(actual - predicted))
print(f"MAE: {mae}")

# MSE
mse = np.mean((actual - predicted) ** 2)
print(f"MSE: {mse}")

# RMSE
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")


MAD: 0.5786059149380388
MAPE: 2.285807346116324%
MAE: 0.5786059149380388
MSE: 0.5168167472848386
RMSE: 0.7188996781782828


In [10]:
# Check if Close and Adj Close columns are identical
identical = (data['Close'] == data['Adj Close']).all()
print(f"Are 'Close' and 'Adj Close' identical? {identical}")


Are 'Close' and 'Adj Close' identical? False
