In [1]:
# Data Processing
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

In [2]:
df = pd.read_csv('./Downloads/Covvid.csv')

In [3]:
df.fillna(0)

Unnamed: 0.1,Unnamed: 0,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,0,643,18,30,595,95,1,2
1,1,920,26,36,858,277,8,6
2,2,1406,42,39,1325,486,16,3
3,3,2075,56,49,1970,669,14,10
4,4,2877,82,58,2737,802,26,9
...,...,...,...,...,...,...,...,...
8814,8814,21209,121,11674,9414,678,5,569
8815,8815,15988,146,9959,5883,525,4,213
8816,8816,1691,483,833,375,10,4,36
8817,8817,4552,140,2815,1597,71,1,465


In [4]:
columns_to_drop=['Unnamed: 0']

In [5]:
df= df.drop(columns_to_drop, axis=1)

In [6]:
df.fillna(0)

Unnamed: 0,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,643,18,30,595,95,1,2
1,920,26,36,858,277,8,6
2,1406,42,39,1325,486,16,3
3,2075,56,49,1970,669,14,10
4,2877,82,58,2737,802,26,9
...,...,...,...,...,...,...,...
8814,21209,121,11674,9414,678,5,569
8815,15988,146,9959,5883,525,4,213
8816,1691,483,833,375,10,4,36
8817,4552,140,2815,1597,71,1,465


In [7]:

X = df.drop('Deaths', axis=1)
y = df['Deaths']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [10]:
# rf_model.fit(X_train, y_train)

In [None]:
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
conf_matrix = confusion_matrix(y_test, y_pred)

In [None]:
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
for i in range(3):
    tree = rf_model.estimators_[i]
    dot_data = export_graphviz(tree,
                               feature_names=X_train.columns,
                               filled=True,
                               max_depth=2,
                               impurity=False,
                               proportion=True)
    graph = graphviz.Source(dot_data)
    display(graph)

In [None]:
param_dist = {'n_estimators': randint(50,500),
              'max_depth': randint(1,20)}
rf_model = RandomForestClassifier()
rand_search = RandomizedSearchCV(rf_model,
                                 param_distributions = param_dist,
                                 n_iter=5,
                                 cv=5)
rand_search.fit(X_train, y_train)

In [None]:
param_dist = {'n_estimators': randint(50,500),
              'max_depth': randint(1,20)}
rf_model = RandomForestClassifier()
rand_search = RandomizedSearchCV(rf_model,
                                 param_distributions = param_dist,
                                 n_iter=5,
                                 cv=5)
rand_search.fit(X_train, y_train)
best_rf = rand_search.best_estimator_

print('Best hyperparameters:',  rand_search.best_params_)

In [None]:

y_pred = best_rf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(confusion_matrix=cm).plot();

In [None]:
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

In [None]:

feature_importances = pd.Series(best_rf.feature_importances_, index=X_train.columns).sort_values(ascending=False)

feature_importances.plot.bar();