1.Data Preprocessing and Exploration

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load the data
df = pd.read_csv('Meta_Dataset.csv')

# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Set 'Date' as index
df.set_index('Date', inplace=True)

# Display basic information about the dataset
print(df.info())
print(df.describe())

2.Exploratory Data Analysis (EDA)

In [None]:
# Plot the closing price over time
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'])
plt.title('Meta Stock Closing Price Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Distribution of closing prices
plt.figure(figsize=(10, 6))
sns.histplot(df['Close'], kde=True)
plt.title('Distribution of Closing Prices')
plt.xlabel('Closing Price')
plt.show()

# Box plot of price data
plt.figure(figsize=(12, 6))
df[['Open', 'High', 'Low', 'Close']].boxplot()
plt.title('Box Plot of Price Data')
plt.ylabel('Price')
plt.show()

# Correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap of Features')
plt.show()

3.Time Series Analysis

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

# Perform time series decomposition
result = seasonal_decompose(df['Close'], model='multiplicative', period=30)

# Plot the decomposition
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 16))
result.observed.plot(ax=ax1)
ax1.set_title('Observed')
result.trend.plot(ax=ax2)
ax2.set_title('Trend')
result.seasonal.plot(ax=ax3)
ax3.set_title('Seasonal')
result.resid.plot(ax=ax4)
ax4.set_title('Residual')
plt.tight_layout()
plt.show()

4.Machine Learning Model Implementation

# Prepare the data for ML model
df['Target'] = df['Close'].shift(-1)
df = df.dropna()

X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
y = df['Target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

5.Prediction and Evaluation

In [None]:
# Plot predicted vs actual prices
plt.figure(figsize=(12, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted Stock Prices')
plt.tight_layout()
plt.show()

# Plot the last 30 days of actual vs predicted prices
last_30_days = df.index[-30:]
actual_prices = df.loc[last_30_days, 'Close']
predicted_prices = model.predict(df.loc[last_30_days, ['Open', 'High', 'Low', 'Close', 'Volume']])

plt.figure(figsize=(12, 6))
plt.plot(last_30_days, actual_prices, label='Actual Prices')
plt.plot(last_30_days, predicted_prices, label='Predicted Prices')
plt.title('Actual vs Predicted Stock Prices (Last 30 Days)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Analysis Summary:

1.The Meta stock price data spans from May 18, 2012, to April 15, 2024, providing a comprehensive history of the stock's performance.

2.The closing price shows an overall upward trend over the years, with significant volatility and some major price movements.

3.The stock price distribution is right-skewed, indicating more frequent lower prices with some high-value outliers.

4.There is a strong correlation between Open, High, Low, and Close prices, which is expected in stock price data.

5.The time series decomposition reveals a clear upward trend, with some seasonal patterns and residual noise.

6.The Linear Regression model provides a baseline for stock price prediction, but its performance may be limited due to the complexity of stock price movements.

7.The R-squared score indicates how much of the stock price variance is explained by the model. A higher R-squared (closer to 1) suggests better predictive power.

8.The actual vs predicted price plot shows the model's accuracy, with points closer to the diagonal line indicating better predictions.

9.The last 30 days comparison gives an idea of the model's short-term predictive capabilities.

