Topic Name:COVID 19 DATA ANALYSIS

Cell 1: Importing Essential Libraries for Data Manipulation and Analysis

In [1]:
# Data Manipulation
import pandas as pd
import numpy as np

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits import mplot3d

import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
with warnings.catch_warnings():
    warnings.simplefilter("ignore", RuntimeWarning)

Cell 2: Loading and Exploring the Data


In [None]:
# Read the CSV file into a DataFrame
data_frame = pd.read_csv('/kaggle/input/corona-virus-report/country_wise_latest.csv')
data_frame.head(10)

# A summary of not null count and datatype of the data frame columns
data_info = data_frame.info()
print(data_info)

# A summary of statistics pertaining to the data frame columns
print('Data frame columns are summarized as- \n:')
data_frame.describe()

# Null function for detecting the missing values
data_frame.isnull()

# Sum of Column-wise Null Values
print(data_frame.isnull().sum())

# Drop rows with any missing values
df_cleaned = data_frame.dropna()

Cell 3: Calculating Totals and Sorting Data

In [None]:
# Total confirmed case, recovered case, death case
total_confirmed = data_frame['Confirmed'].sum()
total_recovered = data_frame['Recovered'].sum()
total_death = data_frame['Deaths'].sum()

print("Total Confirmed case", total_confirmed)
print("Total Recovered case", total_recovered)
print('Total Death case', total_death)

# Top confirmed cases country
sort_confirmed = data_frame.sort_values(by='Confirmed', ascending=False)
print(sort_confirmed.head(5))

# Lowest five confirmed case countries
print(sort_confirmed.tail(5))

# Top five death cases country
sort_deaths = data_frame.sort_values(by='Deaths', ascending=False)
print(sort_deaths.head(5))

# Lowest five death case countries
print(sort_deaths.tail(5))

# Top five recovered cases country
sort_recovered = data_frame.sort_values(by='Recovered', ascending=False)
print(sort_recovered.head(5))

# Lowest five recovered case countries
print(sort_recovered.tail(5))

# Top five active cases country
sort_active = data_frame.sort_values(by='Active', ascending=False)
print(sort_active.head(5))

# Lowest five active case countries
print(sort_active.tail(5))


Cell 4: Plotting Confirmed Cases Country-wise

In [None]:
# Plotting Confirmed cases country-wise
plt.style.use('dark_background')
plt.figure(figsize=(15, 4))
plt.plot(data_frame['Country/Region'], data_frame['Confirmed'])
plt.xlabel("Country/Region")
plt.ylabel('Confirmed')
plt.title('Confirmed cases country-wise', fontsize=16, fontweight='bold', style='italic')
plt.show()


Cell 5: Plotting Various COVID-19 Trends

In [None]:
# Plotting multiple trends: Confirmed, Deaths, Recovered, Active
plt.figure(figsize=(15, 6))
plt.style.use('dark_background')

# Plotting Confirmed cases
plt.plot(data_frame['Country/Region'], data_frame['Confirmed'], linestyle='-', color='#051282', label='Confirmed', lw=3)

# Plotting Deaths
plt.plot(data_frame['Country/Region'], data_frame['Deaths'], linestyle='-.', color='#ed0231', label='Deaths')

# Plotting Recovered cases
plt.plot(data_frame['Country/Region'], data_frame['Recovered'], linestyle='--', color='#30c90e', label='Recovered', lw=2)

# Plotting active cases
plt.plot(data_frame['Country/Region'], data_frame['Active'], linestyle=':', color='w', label='Active', lw=2)

plt.xlabel('Country/Region')
plt.ylabel('No: of cases (in millions)')
plt.title('Global Trend of COVID-19 Cases')
plt.legend()
plt.show()


Cell 6: Plotting Relationships Between Various Metrics

In [None]:
# Plotting Deaths vs Confirmed cases
plt.style.use('dark_background')
plt.figure(figsize=(15, 4))
sns.lineplot(x="Deaths", y="Confirmed", data=data_frame)
plt.title('Confirmed cases vs Death cases')
plt.show()

# Plotting Recovered cases vs Confirmed cases
plt.figure(figsize=(15, 4))
sns.lineplot(x="Recovered", y="Confirmed", data=data_frame)
plt.show()

# Plotting Active cases vs Confirmed cases
plt.figure(figsize=(15, 4))
sns.lineplot(x="Active", y="Confirmed", data=data_frame)
plt.show()


Cell 7: Plotting New Cases Trends

In [None]:
# Plotting New cases, Deaths, Recovered cases
plt.figure(figsize=(15, 6))
plt.style.use('dark_background')

# Plotting New cases
plt.plot(data_frame['Country/Region'], data_frame['New cases'], linestyle='-', color='#051282', label='New_cases', lw=3)

# Plotting New Death cases
plt.plot(data_frame['Country/Region'], data_frame['New deaths'], linestyle='-.', color='#ed0231', label='New_deaths')

# Plotting New Recovered cases
plt.plot(data_frame['Country/Region'], data_frame['New recovered'], linestyle='--', color='#30c90e', label='New_recovered', lw=2)

plt.xlabel('Country')
plt.ylabel('No: of cases')
plt.title('Global Trend of New COVID-19 Cases')
plt.legend()
plt.show()


Cell 8: Visualizing WHO Region-wise Data

In [None]:
# Confirmed cases WHO Region-wise
Who_regionscon = data_frame.groupby('WHO Region')['Confirmed'].sum().sort_values()
print(Who_regionscon)

# Data Visualization of Confirmed Cases WHO Region-wise
plt.figure(figsize=(9, 4))
sns.barplot(x="WHO Region", y='Confirmed', data=data_frame, palette='pastel')

# Pie chart
plt.figure(figsize=(4, 4))
plt.pie(Who_regionscon, labels=Who_regionscon.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel'))
plt.title("Confirmed cases vs WHO Region")
plt.show()


Cell 9: Visualizing Deaths WHO Region-wise

In [None]:
# Death cases WHO Region-wise
Who_regionsdea = data_frame.groupby('WHO Region')['Deaths'].sum().sort_values()
print(Who_regionsdea)

# Data Visualization Death cases WHO Region-wise
plt.figure(figsize=(8, 4))
sns.barplot(x="WHO Region", y='Deaths', data=data_frame, palette='magma')

# Pie chart
plt.figure(figsize=(4, 4))
plt.pie(Who_regionsdea, labels=Who_regionsdea.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('magma'))
plt.show()


Cell 10: Visualizing Recovered Cases WHO Region-wise

In [None]:
# Recovered cases WHO Region-wise
Who_regionsrec = data_frame.groupby('WHO Region')['Recovered'].sum().sort_values()
print(Who_regionsrec)

# Data Visualization Recovered cases WHO Region-wise
plt.figure(figsize=(8, 4))
sns.barplot(x="WHO Region", y='Recovered', data=data_frame, palette='cubehelix')

# Pie chart
plt.figure(figsize=(4, 4))
plt.pie(Who_regionsrec, labels=Who_regionsrec.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('cubehelix'))
plt.show()


Cell 11: Visualizing Active Cases WHO Region-wise

In [None]:
# Active cases WHO Region-wise
Who_regionsact = data_frame.groupby('WHO Region')['Active'].sum().sort_values()
print(Who_regionsact)

# Data Visualization Active cases WHO Region-wise
plt.figure(figsize=(9, 4))
sns.barplot(x="WHO Region", y='Active', data=data_frame, palette='pastel')

# Pie chart
plt.figure(figsize=(4, 4))
plt.pie(Who_regionscon, labels=Who_regionscon.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel'))
plt.title("Active cases vs WHO Region")
plt.show()


Cell 12: Plotting 1 Week Percentage Increase

In [None]:
# Data Visualization of 1-week % increase
plt.style.use('ggplot')
plt.figure(figsize=(15, 4))
sns.lineplot(x='1 week % increase', y="WHO Region", data=data_frame)
plt.show()


Cell 13: Loading and Visualizing Nepal's Data

In [None]:
# Data loading of Nepal
nepal = data_frame[data_frame['Country/Region'] == 'Nepal']
print(nepal)

# Data Visualization of Nepal
plt.figure(figsize=(15, 4))
sns.barplot(data=nepal, orient='h')
plt.xlabel("Count")
plt.title('Nepal')
plt.show()


Cell 14: Decision Tree and Random Forest Models

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Prepare the data
features = ['Deaths', 'Recovered', 'Active']  # Select


PROJECT ENDS THANK YOU FOR READING                                                                               BY-SANYAM,PARV GOEL ,RAHUL SHARMA AND VANSHAJ