<a href="https://colab.research.google.com/github/MD03/DOAI/blob/main/covid_cases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Load the dataset
url = "https://tinyurl.com/67a9dt5j"
covid_data = pd.read_csv(url)
# Drop rows with missing values in important columns
covid_data.dropna(subset=['location', 'total_cases', 'total_deaths', 'population'], inplace=True)

# Feature engineering: Calculate cases and deaths per population
covid_data['cases_per_population'] = covid_data['total_cases'] / covid_data['population']
covid_data['deaths_per_population'] = covid_data['total_deaths'] / covid_data['population']

# Select relevant features for the model
features = ['cases_per_population', 'deaths_per_population']

# Target variable: Binary classification (1 if location is prone to COVID, 0 otherwise)
covid_data['prone_to_covid'] = (covid_data['new_cases_smoothed_per_million'] > 10).astype(int)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(covid_data[features], covid_data['prone_to_covid'], test_size=0.2, random_state=42)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Model evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))