In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# Load the dataset
covid = pd.read_csv('data/covid.csv')
covid.head

In [None]:


# Convert date column to datetime format
covid["date"] = pd.to_datetime(covid["date"])

# Handling missing values
covid.fillna(method='ffill', inplace=True)  # Forward fill missing values

# Basic EDA
print("Dataset Shape:", covid.shape)
print("Dataset Info:")
print(covid.info())
print("Summary Statistics:")
print(covid.describe())

# Plot new cases over time
plt.figure(figsize=(12,6))
sns.lineplot(data=covid, x="date", y="new_cases", hue="location")
plt.title("New COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.show()

# Feature Engineering: Creating rolling average
covid['new_cases_avg'] = covid['new_cases'].rolling(window=7).mean()

# Normalize data for LSTM model
scaler = MinMaxScaler()
covid[['total_cases', 'total_deaths', 'total_tests', 'population', 'new_cases']] = scaler.fit_transform(
    covid[['total_cases', 'total_deaths', 'total_tests', 'population', 'new_cases']]
)

# Prepare data for LSTM
features = ['total_cases', 'total_deaths', 'total_tests', 'population']
target = 'new_cases'

data = covid[features].values
target_data = covid[target].values.reshape(-1, 1)

X, y = [], []
seq_length = 10

for i in range(len(data) - seq_length):
    X.append(data[i:i+seq_length])
    y.append(target_data[i+seq_length])

X, y = np.array(X), np.array(y)

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build LSTM Model
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(seq_length, len(features))),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Predictions
y_pred = model.predict(X_test)

# Model evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")

# Save the trained model
model.save('models/covid_lstm_model.h5')
