In [69]:
'''Create a Series and DataFrame using the data given in Table .

 COVID-19 Pandemic by Location

Location	        Cases	    Deaths	    Recovered
United States	    28,833,039	517,204	    No data
India	            11,079,979	156,938	    10,763,451
Brazil	            10,517,232	254,263	    9,386,440
Russia	            4,246,079	86,122	    3,811,797
United Kingdom	    4,170,519	122,705	    No data
France	            3,736,016	86,332	    No data
Spain	            3,188,553	69,142	    No data
Italy	            2,907,825	97,507	    2,398,352
Turkey	            2,693,164	28,503	    2,565,723
Germany	            2,444,169	70,601	    2,242,767
'''

import pandas as pd


data = {
    "Location": [
        "United States", "India", "Brazil", "Russia", "United Kingdom", 
        "France", "Spain", "Italy", "Turkey", "Germany"
    ],
    "Cases": [
        28833039, 11079979, 10517232, 4246079, 4170519,
        3736016, 3188553, 2907825, 2693164, 2444169
    ],
    "Deaths": [
        517204, 156938, 254263, 86122, 122705,
        86332, 69142, 97507, 28503, 70601
    ],
    "Recovered": [
        "No data", "10763451", "9386440", "3811797", "No data",
        "No data", "No data", "2398352", "2565723", "2242767"
    ]
}

# Creating DataFrame
df = pd.DataFrame(data)

# Creating Series 
location_series = pd.Series(data["Location"], name="Location")
cases_series = pd.Series(data["Cases"], name="Cases")
deaths_series = pd.Series(data["Deaths"], name="Deaths")
recovered_series = pd.Series(data["Recovered"], name="Recovered")

df

Unnamed: 0,Location,Cases,Deaths,Recovered
0,United States,28833039,517204,No data
1,India,11079979,156938,10763451
2,Brazil,10517232,254263,9386440
3,Russia,4246079,86122,3811797
4,United Kingdom,4170519,122705,No data
5,France,3736016,86332,No data
6,Spain,3188553,69142,No data
7,Italy,2907825,97507,2398352
8,Turkey,2693164,28503,2565723
9,Germany,2444169,70601,2242767


In [70]:
''' 
    Create a new column (Status) in Qs 2 Dataframe. Analyze the “Cases” column, in case of 
    less than 3 million cases set the Status to low. In case of greater than 3 million and 
    less than 4 million cases Status is medium, otherwise high.
'''

def get_status(case):
    if case < 3000000:
        return "Low"
    elif 3000000 <= case < 4000000:
        return "Medium"
    else:
        return "High"

df["Status"] = df["Cases"].apply(get_status)

df

Unnamed: 0,Location,Cases,Deaths,Recovered,Status
0,United States,28833039,517204,No data,High
1,India,11079979,156938,10763451,High
2,Brazil,10517232,254263,9386440,High
3,Russia,4246079,86122,3811797,High
4,United Kingdom,4170519,122705,No data,High
5,France,3736016,86332,No data,Medium
6,Spain,3188553,69142,No data,Medium
7,Italy,2907825,97507,2398352,Low
8,Turkey,2693164,28503,2565723,Low
9,Germany,2444169,70601,2242767,Low


In [71]:
''' 
    Rank the created Series and DataFrame in ascending order of deaths count.
'''

df['Deaths_Rank'] = df['Deaths'].rank(method='min', ascending=True)
df

# ranking series
locations_ranked = pd.Series(
    df['Deaths_Rank'].values, index=df['Location'], name="Location by Death"
)
locations_ranked

Location
United States     10.0
India              8.0
Brazil             9.0
Russia             4.0
United Kingdom     7.0
France             5.0
Spain              2.0
Italy              6.0
Turkey             1.0
Germany            3.0
Name: Location by Death, dtype: float64

In [72]:
''' 
    Compute the statistics of the created Series and DataFrame by using 
    summary stats related methods.
'''

df.describe()

Unnamed: 0,Cases,Deaths,Deaths_Rank
count,10.0,10.0,10.0
mean,7381658.0,148931.7,5.5
std,8172197.0,143366.103826,3.02765
min,2444169.0,28503.0,1.0
25%,2978007.0,74481.25,3.25
50%,3953268.0,91919.5,5.5
75%,8949444.0,148379.75,7.75
max,28833040.0,517204.0,10.0


In [73]:
qt = df.Deaths.quantile(0.9) # the value below which 90% of the data values fall

In [74]:
# location of minimum cases
min_cases_index = df['Cases'].idxmin()
location_with_min_cases = df.loc[min_cases_index, 'Location']

In [75]:
# location with max death %
df["Death_Percentage"] = (df["Deaths"] / df["Cases"]) * 100
max_death_percentage_idx = df["Death_Percentage"].idxmax()

loc = df.loc[max_death_percentage_idx, ["Location", "Death_Percentage"]]


In [76]:
deaths_skew = df["Deaths"].skew()
deaths_std = df["Deaths"].std()
deaths_kurt = df["Deaths"].kurt()

In [77]:
print(f'''
Statistal Results:
      skewness of deaths = {deaths_skew}
      standard deviatin of death = {deaths_std}
      Kurtosis of deaths = {deaths_kurt}
      90% values in death column lies below {qt}
      location with max deaths = {loc}
      ''')


Statistal Results:
      skewness of deaths = 2.249841686549378
      standard deviatin of death = 143366.1038259509
      Kurtosis of deaths = 5.39601411977261
      90% values in death column lies below 280557.0999999999
      location with max deaths = Location               Italy
Death_Percentage    3.353262
Name: 7, dtype: object
      


In [78]:
''' 
    Fill the missing data of Table 2.14 with the mean and median value of a Series by 
    using fillna function.
'''
import numpy as np
df["Recovered"] = df["Recovered"].replace('No data', np.nan)
df

Unnamed: 0,Location,Cases,Deaths,Recovered,Status,Deaths_Rank,Death_Percentage
0,United States,28833039,517204,,High,10.0,1.793789
1,India,11079979,156938,10763451.0,High,8.0,1.416411
2,Brazil,10517232,254263,9386440.0,High,9.0,2.417585
3,Russia,4246079,86122,3811797.0,High,4.0,2.028271
4,United Kingdom,4170519,122705,,High,7.0,2.9422
5,France,3736016,86332,,Medium,5.0,2.310804
6,Spain,3188553,69142,,Medium,2.0,2.168444
7,Italy,2907825,97507,2398352.0,Low,6.0,3.353262
8,Turkey,2693164,28503,2565723.0,Low,1.0,1.058346
9,Germany,2444169,70601,2242767.0,Low,3.0,2.888548


In [79]:
df["Recovered"] = pd.to_numeric(df["Recovered"], errors="coerce")

mean_recovered = df["Recovered"].mean()
median_recovered = df["Recovered"].median()

# df["Recovered"] = df["Recovered"].fillna(mean_recovered)
df["Recovered"] = df["Recovered"].fillna(median_recovered)
df

Unnamed: 0,Location,Cases,Deaths,Recovered,Status,Deaths_Rank,Death_Percentage
0,United States,28833039,517204,3188760.0,High,10.0,1.793789
1,India,11079979,156938,10763451.0,High,8.0,1.416411
2,Brazil,10517232,254263,9386440.0,High,9.0,2.417585
3,Russia,4246079,86122,3811797.0,High,4.0,2.028271
4,United Kingdom,4170519,122705,3188760.0,High,7.0,2.9422
5,France,3736016,86332,3188760.0,Medium,5.0,2.310804
6,Spain,3188553,69142,3188760.0,Medium,2.0,2.168444
7,Italy,2907825,97507,2398352.0,Low,6.0,3.353262
8,Turkey,2693164,28503,2565723.0,Low,1.0,1.058346
9,Germany,2444169,70601,2242767.0,Low,3.0,2.888548
