In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
import warnings

# Suppress warnings from statsmodels
warnings.filterwarnings("ignore")

dates = pd.date_range('2022-01-01', periods=500, freq='B') # 'B' = Business Day
n = len(dates)

# Create a random walk with a slight upward trend
trend = np.linspace(0, 20, n)
price_noise = np.random.randn(n).cumsum()
price = 100 + price_noise + trend
price = np.round(price, 2)

# Create volume data
volume = np.random.randint(1_000_000, 5_000_000, n)
# Make volume slightly correlated with price changes
volume[1:] = volume[1:] + (np.abs(np.diff(price)) * 100_000).astype(int)

# Create DataFrame
df_mock = pd.DataFrame({'Date': dates, 'Close': price, 'Volume': volume})
# Add some missing values
df_mock.loc[df_mock.sample(frac=0.02).index, 'Close'] = np.nan
df_mock.loc[df_mock.sample(frac=0.01).index, 'Volume'] = np.nan

csv_data = df_mock.to_csv(index=False)
# ----------------------------------------------------

# In a real case, you would use this line instead of the code above:
# file_path = "Stock_Prices.csv"
# df = pd.read_csv(file_path)

# Task 1: Import the dataset
csv_file = io.StringIO(csv_data)
df = pd.read_csv(csv_file)

# Task 2: Explore the dataset
print("--- 2. Initial Data Exploration ---")
print("Shape:", df.shape)
print("\nHead:\n", df.head())
print("\nInfo:")
df.info()

# --- Data Preprocessing ---
print("\n--- 3. Preprocessing Data ---")
# Task 3: Ensure date column is in datetime format
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Handle missing values (common in time-series)
# Forward-fill: assumes the value is the same as the previous day's
df.fillna(method='ffill', inplace=True)
print("\nMissing values filled.")
print(df.describe())

# Task 4: Plot historical stock price trends
print("\n--- 4. Plotting Historical Trend ---")
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], label='Close Price')
plt.title('Historical Stock Close Price Over Time', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Close Price ($)', fontsize=12)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('stock_price_trend.png')
plt.close()
print("Saved 'stock_price_trend.png'")

# Task 5: Calculate and plot moving averages
print("\n--- 5. Plotting Moving Averages ---")
# Calculate 20-day (short-term) and 50-day (long-term) moving averages
df['MA_20'] = df['Close'].rolling(window=20).mean()
df['MA_50'] = df['Close'].rolling(window=50).mean()

plt.figure(figsize=(12, 6))
plt.plot(df['Close'], label='Close Price', color='blue', alpha=0.5)
plt.plot(df['MA_20'], label='20-Day Moving Average', color='orange', linewidth=2)
plt.plot(df['MA_50'], label='50-Day Moving Average', color='red', linewidth=2)
plt.title('Stock Price with 20-Day and 50-Day Moving Averages', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Close Price ($)', fontsize=12)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('stock_price_moving_averages.png')
plt.close()
print("Saved 'stock_price_moving_averages.png'")

# Task 6: Perform seasonality analysis
print("\n--- 6. Performing Seasonality Analysis ---")
# We use seasonal_decompose. A 'period' of 21 is common for
# monthly seasonality in daily business data (approx. 21 trading days/month).
decomposition = seasonal_decompose(df['Close'], model='additive', period=21)

fig = decomposition.plot()
fig.set_size_inches(12, 10)
fig.suptitle('Time Series Decomposition (Trend, Seasonality, Residuals)', y=1.02)
plt.tight_layout()
plt.savefig('stock_price_decomposition.png')
plt.close()
print("Saved 'stock_price_decomposition.png'")

# Task 7: Analyze and plot correlation
print("\n--- 7. Analyzing Correlation ---")
# Analyze correlation between Close price and Volume
correlation_matrix = df[['Close', 'Volume']].corr()
print("Correlation Matrix:\n", correlation_matrix)

# Plot the correlation heatmap using Seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap: Close Price vs. Volume', fontsize=16)
plt.tight_layout()
plt.savefig('correlation_heatmap.png')
plt.close()
print("Saved 'correlation_heatmap.png'")

# Task 8: Use ARIMA model to forecast
print("\n--- 8. Forecasting with ARIMA ---")
# We'll use a simple ARIMA(p,d,q) model
# (1,1,1) is a common baseline:
# p=1: Autoregressive part, uses 1 previous observation
# d=1: Integrated part, data is differenced 1 time (to make it stationary)
# q=1: Moving Average part, uses 1 past forecast error
model = ARIMA(df['Close'], order=(1, 1, 1))
model_fit = model.fit()

# Forecast the next 30 business days
forecast_steps = 30
forecast = model_fit.forecast(steps=forecast_steps)

# Create a date range for the forecast
forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_steps + 1, freq='B')[1:]
forecast_series = pd.Series(forecast, index=forecast_dates)

print("\nARIMA Model Summary:")
print(model_fit.summary())

# Plot the forecast
plt.figure(figsize=(12, 6))
plt.plot(df['Close'].tail(100), label='Historical Data (Last 100 Days)') # Plot last 100 days
plt.plot(forecast_series, label='Forecast (Next 30 Days)', color='red', linestyle='--')
plt.title('Stock Price Forecast using ARIMA(1,1,1)', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Close Price ($)', fontsize=12)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('stock_price_forecast.png')
plt.close()
print("Saved 'stock_price_forecast.png'")

print("\n--- All tasks complete. ---")

--- 2. Initial Data Exploration ---
Shape: (500, 3)

Head:
          Date  Close     Volume
0  2022-01-03  99.02  1736055.0
1  2022-01-04  97.56  2078428.0
2  2022-01-05  97.86  1412937.0
3  2022-01-06  97.40  4282819.0
4  2022-01-07  98.66  2483398.0

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    500 non-null    object 
 1   Close   490 non-null    float64
 2   Volume  495 non-null    float64
dtypes: float64(2), object(1)
memory usage: 11.8+ KB

--- 3. Preprocessing Data ---

Missing values filled.
            Close        Volume
count  500.000000  5.000000e+02
mean   124.031360  3.018652e+06
std     20.624597  1.172165e+06
min     92.020000  1.024366e+06
25%    100.705000  1.958740e+06
50%    128.090000  3.024169e+06
75%    135.015000  3.998509e+06
max    162.310000  5.148268e+06

--- 4. Plotting Historical Trend ---
Saved 'stock_price

In [None]:
# pip install statsmodels
python.exe -m pip install --upgrade pip