In [None]:
import pandas as pd
import plotly.express as px
df = pd.read_csv("covid_data.csv")
df.info()
df.describe()
df.head()


Data cleaning

In [None]:
df.dropna(thresh=0.5*len(df), axis=1, inplace=True)
df['new_cases'].fillna(method='ffill', inplace=True)
df['date'] = pd.to_datetime(df['date'])
df['new_cases'] = pd.to_numeric(df['new_cases'], errors='coerce')
df.drop_duplicates(inplace=True)

#Data restructuring
df.rename(columns={'location': 'country'}, inplace=True)


df = df[df['continent'].notna()]


Feature Engineering 

In [None]:
#Daily growth rate
df['growth_rate'] = df['new_cases'] / (df['total_cases'] - df['new_cases'])


In [None]:
#Case fatality rate
df['fatality_rate'] = df['total_deaths'] / df['total_cases']

In [None]:
#Vaccination ratio
df['vaccination_ratio'] = df['people_vaccinated'] / df['population']

Exploratory Data Analysis

In [None]:
#Distribution of cases, deaths, vaccination rates
df['new_cases'].hist(bins=50)


In [None]:
#Summary statistics by country or continent:
df.groupby('continent')['total_cases'].sum().sort_values(ascending=False)

In [None]:
#Daily trend of cases, deaths, or vaccinations
df.groupby('date')['new_cases'].sum().plot(title='Global Daily Cases')

In [None]:
#Correlation Analysis
df[['total_cases', 'total_deaths', 'people_vaccinated']].corr()


In [None]:
# Compare countries
top_countries = df.groupby('country')['total_cases'].max().sort_values(ascending=False).head(10)

In [None]:

#Geographical Visualization
fig = px.choropleth(df, locations="iso_code", color="total_cases",
                    hover_name="country", title="COVID-19 Total Cases by Country")
fig.show()


In [None]:
#Identify possible waves corresponding to lockdowns or vaccination campaigns.
global_trend = df.groupby('date')[['new_cases', 'new_deaths']].sum()
global_trend.plot(title='Global Daily COVID-19 Cases and Deaths')
