<a href="https://colab.research.google.com/github/Przemo200/Covid19_Power_BI_dashboard/blob/main/covid_BI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"

print("Downloading...")
df = pd.read_csv(url)

cols = [
    'iso_code', 'continent', 'location', 'date',
    'new_cases', 'new_deaths', 'total_cases', 'total_deaths',
    'people_vaccinated', 'people_fully_vaccinated', 'population'
]

df = df[cols].copy()

# filtering out non-country aggregates
df = df[~df['iso_code'].str.startswith('OWID')]
df = df.dropna(subset=['continent'])

df['date'] = pd.to_datetime(df['date'])
df['year_month'] = df['date'].dt.to_period('M').astype(str)

df.fillna(0, inplace=True)

# 7-day rolling average
df = df.sort_values(by=['location', 'date'])
df['new_cases_7day'] = df.groupby('location')['new_cases'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())
df['new_deaths_7day'] = df.groupby('location')['new_deaths'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

# %
df['Percent_Vaccinated'] = (df['people_fully_vaccinated'] / df['population'])
df['Percent_Vaccinated'] = df['Percent_Vaccinated'].clip(upper=1.0)

print("\nChecking data:")

print(f"\nShape: {df.shape}")

print("\nNot null sum:")
print(df.isnull().sum())

print("\nFirst 5 rows")
print(df.head())

df.to_csv('covid_final.csv', index=False)
print("\nDone")

Downloading...

Checking data:

Shape: (395311, 15)

Not null sum:
iso_code                   0
continent                  0
location                   0
date                       0
new_cases                  0
new_deaths                 0
total_cases                0
total_deaths               0
people_vaccinated          0
people_fully_vaccinated    0
population                 0
year_month                 0
new_cases_7day             0
new_deaths_7day            0
Percent_Vaccinated         0
dtype: int64

First 5 rows
  iso_code continent     location       date  new_cases  new_deaths  \
0      AFG      Asia  Afghanistan 2020-01-05        0.0         0.0   
1      AFG      Asia  Afghanistan 2020-01-06        0.0         0.0   
2      AFG      Asia  Afghanistan 2020-01-07        0.0         0.0   
3      AFG      Asia  Afghanistan 2020-01-08        0.0         0.0   
4      AFG      Asia  Afghanistan 2020-01-09        0.0         0.0   

   total_cases  total_deaths  people_vaccina