# Analyzing Covid Data

1- Read CSV File

In [None]:
# import pandas and read `covid.csv` file
df = pd.read_csv("covid.csv", index_col = 0)

2- Select the correct shape

In [None]:
df.shape
#Shape: (166620, 25)

3- Select the correct datatype

In [None]:
df.info()
#new_cases: float64
#index: int64
#continent: object

4- Find the minimum and maximum values

In [None]:
df.describe()
#The minimum number of cases is 1.0
#The maximum number of cases is 99470198.0

5- Total cases in the COVID-19 dataset

In [None]:
df["total_cases"].sum()
#The total number of cases is 253558957405.0

6- Find the mean cases per day

In [None]:
round(df["new_cases"].mean(),2)
#The mean number of new cases per day is 3788.70

7- Select values from a dataframe using indexing

In [None]:
df1 = df[["continent","location"]]

8- Drop columns from the dataframe

In [None]:
df.drop(['iso_code', 'new_cases_smoothed', 'new_deaths_smoothed', 'total_cases_per_million', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million'],axis = 1,inplace = True)

9- Add more rows to a dataframe

In [None]:
#  Adding More rows to a Dataframe.
new_data = {'continent': ['Africa'], 'location': ['Zimbabwe'], 'date': ['2022-12-07'], 'total_cases': [259356.0], 'new_cases': [192.0], 'total_deaths': [5622.0], 'new_deaths': [2.0], 'population_density': [42.729], 'median_age': [19.6], 'aged_65_older': [2.822], 'aged_70_older': [1.845], 'gdp_per_capita': [1899.767], 'cardiovasc_death_rate': [307.846], 'diabetes_prevalence': [1.85], 'life_expectancy': [61.55], 'population': [16320539.0]}
df = pd.concat([df,pd.DataFrame(new_data)], ignore_index = True)

10- Update a specific cell value in the COVID-19 dataset

In [None]:
df.loc[166620,"total_cases"] = 259357.0 

11- Update a multiple cell value in the COVID-19 dataset

In [None]:
df.loc[166619,"total_cases"] = 259358.0 
df.loc[166618,"total_cases"] = 259357.0 

12- Remove rows from the dataframe

In [None]:
df.drop([166619,166620], inplace = True)

13- Use .loc to select rows based on a condition

In [None]:
df_1m = df.loc[df["total_cases"]> 1000000.0]

14- Select specific columns and rows

In [None]:
df_cases_death = df.loc[[5168,5172,163703],["total_cases","total_deaths"]]

15- Sort COVID-19 data in ascending order

In [None]:
df_sorted = df.sort_values(by="total_cases")

16- Sort COVID-19 data in descending order

In [None]:
df_sorted_desc = df.sort_values(by="total_cases", ascending = False)

17- Sort the COVID-19 data by multiple columns

In [None]:
df_sorted_multi = df.sort_values(by=["total_cases","total_deaths"],ascending = [False,True])

18- Add new columns using arithmetic operations

In [None]:
df["total_cases_per_million"] = df["total_cases"] / df["population"]

19- Using vectorized operations to update a column

In [None]:
df["total_cases_per_million"] = df["total_cases_per_million"] * 1000

20- Remove columns using del statement

In [None]:
del df["total_cases_per_million"]

21- Rename columns

In [None]:
df.rename(columns={"total_cases":"Total Cases","total_deaths":"Total Deaths"}, inplace = True)

22- Filter COVID-19 data using boolean indexing

In [None]:
df_india_china = df.loc[df["location"].isin(["India","China"])]
df_range = df.loc[(df["new_cases"] > 100000) & (df["new_cases"]<200000)]
df_greater_new_cases = df.loc[df["new_cases"] >= 10000]

23- Read the data from Covid-19 dataset for visualization

In [None]:
df_for_visualization = pd.read_csv("covid.csv", parse_dates = ["date"])

24- Filter data by month

In [None]:
df_for_plot = df_for_visualization.loc[(df_for_visualization['date'].dt.strftime('%Y-%m') == '2020-03') & (df_for_visualization["location"]=="India")]

25- Create a line plot

In [None]:
df_for_plot.plot(x = "date",
                y = "new_cases",
                grid = True)

#Maximum number of new cases were reported on 2020-03-30.
#Minimum number of new cases were reported on 2020-03-16.

26- Create a bar plot

In [None]:
df_for_plot.plot(x = "date",
                y = "total_deaths",grid = True)

#Maximum number of deaths were reported on 2020-03-31.
#Minimum number of deaths were reported on 2020-03-16.