In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Load COVID-19 data from the owid-covid-data.csv file
data_url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
df = pd.read_csv(data_url)

# Select relevant features and target variables for binary classification
features = ['total_cases_per_million', 'new_cases_per_million', 'total_deaths_per_million', 'new_deaths_per_million']
target_cases = 'new_cases_smoothed_per_million'
target_deaths = 'new_deaths_smoothed_per_million'

# Filter the DataFrame to keep only the relevant columns and rows where 'location' is 'Canada'
df = df[df['location'] == 'Canada'][['location'] + features + [target_cases, target_deaths]].dropna()

# Encode categorical variable 'location' using Label Encoding
label_encoder = LabelEncoder()
df['location'] = label_encoder.fit_transform(df['location'])

# Create binary target variables for cases and deaths (1 if increase, 0 if not increase)
df['increase_cases'] = (df[target_cases].diff() > 0).astype(int)
df['increase_deaths'] = (df[target_deaths].diff() > 0).astype(int)

# Data Preprocessing for Cases
X_cases = df[features + ['location']]  # Features for cases
y_cases = df['increase_cases']          # Binary target variable for cases

# Data Preprocessing for Deaths
X_deaths = df[features + ['location']]  # Features for deaths
y_deaths = df['increase_deaths']        # Binary target variable for deaths

# Split the data into training and testing sets for both cases and deaths
X_cases_train, X_cases_test, y_cases_train, y_cases_test = train_test_split(X_cases, y_cases, test_size=0.2, random_state=42)
X_deaths_train, X_deaths_test, y_deaths_train, y_deaths_test = train_test_split(X_deaths, y_deaths, test_size=0.2, random_state=42)

# Logistic Regression Model for Cases
model_cases = LogisticRegression()
model_cases.fit(X_cases_train, y_cases_train)  # Train the model for cases
y_cases_pred = model_cases.predict(X_cases_test)  # Make predictions for cases

# Logistic Regression Model for Deaths
model_deaths = LogisticRegression()
model_deaths.fit(X_deaths_train, y_deaths_train)  # Train the model for deaths
y_deaths_pred = model_deaths.predict(X_deaths_test)  # Make predictions for deaths

# Calculate MAE, MSE, MAPE, and print for cases
mae_cases = mean_absolute_error(y_cases_test, y_cases_pred)
mse_cases = mean_squared_error(y_cases_test, y_cases_pred)
mape_cases = (100 * (abs(y_cases_test - y_cases_pred) / y_cases_test)).mean()

# Calculate MAE, MSE, MAPE, and print for deaths
mae_deaths = mean_absolute_error(y_deaths_test, y_deaths_pred)
mse_deaths = mean_squared_error(y_deaths_test, y_deaths_pred)
mape_deaths = (100 * (abs(y_deaths_test - y_deaths_pred) / y_deaths_test)).mean()

# Print results for the models
print("Logistic Regression Model for Cases:")
print(f"Mean Absolute Error: {mae_cases}")
print(f"Mean Squared Error: {mse_cases}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_cases}%")

print("\nLogistic Regression Model for Deaths:")
print(f"Mean Absolute Error: {mae_deaths}")
print(f"Mean Squared Error: {mse_deaths}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_deaths}%")

Logistic Regression Model for Cases:
Mean Absolute Error: 0.26277372262773724
Mean Squared Error: 0.26277372262773724
Mean Absolute Percentage Error (MAPE): inf%

Logistic Regression Model for Deaths:
Mean Absolute Error: 0.28832116788321166
Mean Squared Error: 0.28832116788321166
Mean Absolute Percentage Error (MAPE): inf%
