In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load data from CSV file
data = pd.read_csv('webDev5.csv')

# Convert date column to datetime and create additional columns for year, month, and day of week
data['date'] = pd.to_datetime(data['DATE'])
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['dayofweek'] = data['date'].dt.dayofweek

# Split data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

# Create linear regression model
model = LinearRegression()

# Fit model to training data
X_train = train_data[['year', 'month', 'dayofweek']]
y_train = train_data['total_laid_off']
model.fit(X_train, y_train)

# Evaluate model performance on test data
X_test = test_data[['year', 'month', 'dayofweek']]
y_test = test_data['total_laid_off']
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f'MSE: {mse:.4f}, MAE: {mae:.4f}')

# Generate forecast for next 5 years
last_date = data['date'].max()
forecast_dates = pd.date_range(last_date, periods=5*12, freq='M')
forecast_data = pd.DataFrame({'date': forecast_dates})
forecast_data['year'] = forecast_data['date'].dt.year
forecast_data['month'] = forecast_data['date'].dt.month
forecast_data['dayofweek'] = forecast_data['date'].dt.dayofweek
forecast_data['total_laid_off'] = model.predict(forecast_data[['year', 'month', 'dayofweek']])
forecast_data.set_index('date', inplace=True)

# Plot forecasted values along with historical data
plt.figure(figsize=(12,8))
plt.plot(data['total_laid_off'], label='Actual')
plt.plot(forecast_data['total_laid_off'], label='Forecast')
plt.legend()
plt.show()


KeyError: 'DATE'